## Workflow for Downloading and Importing CSV Files Into Pandas Dataframes

1. Import packages
    * os
    * pandas
    * earthpy
2. Download data using earthpy
3. Set working directory using os  
4. Use os to define relative path
5. Import data into pandas dataframes
    * find function to import data: pd.read_csv

In [None]:
# Import packages
import os
import pandas as pd
import earthpy as et

In [None]:
# Download file from URL
avg_monthly_precip_url = "https://ndownloader.figshare.com/files/12710618"
et.data.get_data(url=avg_monthly_precip_url)

In [None]:
# Set working directory
os.chdir(os.path.join(et.io.HOME,"earth-analytics"))

os.getcwd()

In [None]:
# Define relative path
f_avg_monthly_precip = os.path.join("data", "earthpy-downloads", 
                                    "avg-precip-months-seasons.csv")

os.path.exists(f_avg_monthly_precip)

In [None]:
# Import CSV into pandas dataframe
avg_monthly_precip = pd.read_csv(f_avg_monthly_precip)

avg_monthly_precip

In [None]:
# Check shape attribute
avg_monthly_precip.shape

In [None]:
# Run summary stats on numeric columns
avg_monthly_precip.describe()

In [None]:
# See first few rows
avg_monthly_precip.head()

In [None]:
# See last few rows
avg_monthly_precip.tail()

In [None]:
# Note data type object for text str columns
avg_monthly_precip.info()

In [None]:
# Sort values in descending order
avg_monthly_precip.sort_values(by="precip", ascending = False)

In [None]:
# See index for columns (names)
avg_monthly_precip.columns

In [None]:
# Check type
type(avg_monthly_precip)

In [None]:
# Precip column as pandas series
type(avg_monthly_precip["precip"])

In [None]:
# Precip column as pandas dataframe with one series
type(avg_monthly_precip[["precip"]])

In [None]:
# Convert values in precip from in to mm
avg_monthly_precip["precip"] *= 25.4

avg_monthly_precip

In [None]:
# Get summary stats on precip for each unique season
season_stats = avg_monthly_precip.groupby(["seasons"])[["precip"]].describe()

# Note that seasons became row index
season_stats

In [None]:
# Another check that seasons became row index
season_stats.info()

In [None]:
# Use row index to query data
season_stats.loc[["Fall"]]

In [None]:
# Reset row index to range starting at [0]
season_stats.reset_index(inplace=True)

season_stats

In [None]:
# Note row index is once again RangeIndex starting at [0]
season_stats.info()

In [None]:
# Get summary stats without setting index to seasons
season_stats = avg_monthly_precip.groupby(["seasons"], 
                                          as_index=False)[["precip"]].describe()

# Note that seasons is not index due to as_index=False
season_stats

In [None]:
# Note that seasons is not a column in new dataframe
season_stats.info()