In [None]:
import pandas as pd
import matplotlib as plt
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams["font.size"] = 20
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from vega_datasets import data

# Importing the SF Temperature Data:

In [None]:
sf = data.sf_temps()
sf.head()

# <span style = 'color:blue'>San Francisco Exercises:</span>

## 1. Resample by the day and take the average temperature. Visualize the average temperature over time.

In [None]:
#Setting the date as the index, and sorting the dates in order:
sf = sf.set_index('date').sort_index()
sf

In [None]:
#Plotting the distribution of dates over time:
sf.temp.plot()

In [None]:
#Resampling the data by day to visualize the data over time:
sf.resample('1D').mean().plot()

## 2. Write the code necessary to visualize the minimum temperature over time.

In [None]:
#Visualizing the minimum temp over time by day:
sf.resample('1D').min().plot()

## 3. Write the code necessary to visualize the maximum temperature over time.

In [None]:
#Visualizing the maximum temp over time by day:
sf.resample('1D').max().plot()

## 4. Which month is the coldest, on average?

In [None]:
#Resampling the data by month and taking the average to find the minimum:
monthly = sf.resample('M').mean()
monthly

In [None]:
monthly.idxmin()

In [None]:
#My original attempt didn't take into account the *mean* temp for each month. 
#I used 'asfreq', which ultimately only returned the minimum value for each month.
#The issue was resolved once I switched to a monthly resample and specified 'mean'.

In [None]:
monthly = sf.resample('M').mean()
monthly

In [None]:
#Now that monthly has been resampled by the mean temp for each month, I can find the minimum:
monthly[monthly.temp == monthly.temp.min()]

## 5. Which month has the highest average temperature?

In [None]:
monthly = sf.resample('M').mean()
monthly.plot()

In [None]:
monthly[monthly.temp == monthly.temp.max()]

## 6. Resample by the day and calculate the min and max temp for the day (Hint: .agg(['min', 'max'])). Use this resampled dataframe to calculate the change in temperature for the day. Which month has the highest daily temperature variability?

In [None]:
#Resampling the data by day, and creating two columns using .agg to find the min and max:
daily = sf.resample('D').temp.agg(['min', 'max'])

#Plotting the min and max values by day:
daily.plot();

In [None]:
#Creating a column of the range between min and max for each of the days:
daily['diffr'] = daily['max'] - daily['min']

In [None]:
#Finding the date of max range in temperature:
daily[daily['diffr'] == daily['diffr'].max()]

In [None]:
#Creating a dataframe that lists the average range of temps by month:
monthly_range = pd.DataFrame(daily.resample('M').mean().diffr)

In [None]:
#Creating a column in the dataframe that lists the month by name:
monthly_range['month_name'] = monthly_range.index.month_name()

In [None]:
monthly_range

In [None]:
#Finding the month that had the highest average range in temperatures:
monthly_range[monthly_range.diffr == monthly_range.diffr.max()]

## 7. Bonus: Visualize the daily min, average, and max temperature over time on a single line plot, i.e. the min, average, and maximum temperature should be 3 seperate lines.

In [None]:
#since I created the relevant columns for this up above, I can just plot the data here:
daily.plot()

# <span style = 'color:blue'>Seattle Exercises:</span>

In [None]:
#Acquiring the data:
sea = data.seattle_weather()
#Setting the dataframe's index to the date:
sea = sea.set_index('date')

sea.head()

## 1. Which year and month combination has the highest amount of precipitation?

In [None]:
#Resampling the data by month, and finding the mean precipitation:
monthly_weather = sea.resample('M').mean()

#Creating a boolean mask that will return the month with the highest mean precipitation:
monthly_weather[monthly_weather['precipitation'] == (monthly_weather['precipitation'].max())]

## 2. Visualize the amount of monthly precipitation over time.

In [None]:
monthly_weather.precipitation.plot();

## 3. Visualize the amount of wind over time. Choose a time interval you think is appropriate.

In [None]:
monthly_weather.wind.plot();

## 4. Which year-month combination is the windiest?

In [None]:
#Creating a boolean mask that will return the month with the highest average wind:
monthly_weather[monthly_weather['wind'] == (monthly_weather['wind'].max())]

## 5. What's the sunniest year? (Hint: which day has the highest number of days where weather == sun?)

In [None]:
#Creating a 'year' column that will isolate the year from the date index:
sea['year'] = sea.index.year

sea.head()

In [None]:
# The year with the most sun was 2014.
sea.groupby('year').weather.value_counts()

# I want to come back and find a better way to isolate the maximum value. 
#Can I create another boolean mask somehow to keep the dataframe format?

## 6. In which month does it rain the most?

In [None]:
sea['month'] = sea.index.month_name()
sea

In [None]:
#The month with the most rain was February.
sea.groupby('month').weather.value_counts()

# I want to come back and find a better way to isolate the maximum value. 
#Can I create another boolean mask somehow to keep the dataframe format?
# It becomes a chore to scroll through this list and find which is the highest. 

## 7. Which month has the most number of days with a non-zero amount of precipitation?

In [None]:
sea.precipitation

In [None]:
#Creating a column to show whether it rained on that date:
sea['precipitation_true'] = sea.precipitation > 0

In [None]:
#Grouping by month to find the highest number of rainy days by month:
sea.groupby('month').precipitation_true.sum().sort_values(ascending = False).head(1)

It is worth considering some possible alternate interpretations of this question:
- could it be asking which individual month (rather than *all* Decembers, for example?)
- in class, the exercise was completed by finding the highest mean # of rainy days in a month
    - it works out in this case, but that could have been misleading, because not all months have the same number of days. So I went with the actual count of rainy days in my answer. 


# <span style = 'color:blue'>Flight Data Exercises:</span>

In [None]:
#Acquiring the flight data:
fly = data.flights_20k()

#Setting the index to the date values:
fly = fly.set_index('date')
fly

## 1. Convert any negative delays to 0.

In [None]:
#Selecting loc via a boolean array to select delays that are less than 0, and passing the 'delay' label as an argument to set all applicable values to 0:
fly.loc[fly.delay < 0, 'delay'] = 0

In [None]:
fly

## 2. Which hour of the day has the highest average delay?

In [None]:
#Creating an hour column from the date:
fly['hour'] = fly.index.hour
fly

In [None]:
#Finding the mean values for delay and distance by hour of the day and creating a dataframe:
hourly_delay = fly.groupby('hour').mean()
hourly_delay

In [None]:
hourly_delay[hourly_delay.delay == hourly_delay.delay.max()]

## 3. Does the day of the week make a difference in the delay amount?

In [None]:
#creating a 'day' column from the date index:
fly['day'] = fly.index.day_name()

In [None]:
fly.head()

In [None]:
#It appears that Fridays have the highest average delays, so it does appear that weekday affects delays.
daily_mean = fly.groupby('day').mean()
daily_mean

#I want to come back and try to figure out how to order this by the weekday index.

## 4. Does the month make a difference in the delay amount?

In [None]:
#Creating a 'month' column from the date index:
fly['month'] = fly.index.month_name()
fly

In [None]:
#Finding the mean values for delay and distance by hour of the day and creating a dataframe:
monthly_delay = fly.groupby('month').mean()
monthly_delay

In [None]:
#Creating a boolean mask to select the month with the highest average delay:
monthly_delay[monthly_delay.delay == monthly_delay.delay.max()]

In [None]:
monthly_mean = fly.groupby('month').mean()
monthly_mean

#It appears that there is indeed a relationship between month and mean delay duration.

#I want to come back and try to order this by the month index. 