In [1]:
# Process daily rainfall to generate monthly rainfall w/storm anomaly column
# Read csv and produce rainfall totals by monthhttp://localhost:8888/notebooks/ConvertRain.ipynb#
#
# Dependencies and Setup
#
import pandas as pd
import os
import datetime
import re
import numpy as np

In [2]:
# File to Load (Remember to change the path if needed.)
rain = os.path.join("Resources", "daily_rainfall.csv")

# Read the School Data and Student Data and store into a Pandas DataFrame
rain_df = pd.read_csv(rain)
rain_df.head()

Unnamed: 0,CITY,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,Month,Day,Year
0,Bridgewater,40.626247,-74.653535,32.9,01/01/11,0.0,1,1,2011
1,Bridgewater,40.626247,-74.653535,32.9,01/02/11,0.04,1,2,2011
2,Bridgewater,40.626247,-74.653535,32.9,01/03/11,0.02,1,3,2011
3,Bridgewater,40.626247,-74.653535,32.9,01/04/11,0.0,1,4,2011
4,Bridgewater,40.626247,-74.653535,32.9,01/05/11,0.0,1,5,2011


In [3]:
rain_df.dtypes

CITY          object
LATITUDE     float64
LONGITUDE    float64
ELEVATION    float64
DATE          object
PRCP         float64
Month          int64
Day            int64
Year           int64
dtype: object

In [4]:
#Summarize Rainfall Totals by city and month/year
#Get max, avg daily and total accumulated rainfall value for each city for month/year
per_city_rainfall_df = rain_df.groupby(['CITY','Month','Year']).agg({'PRCP': ['max','sum','mean']})
per_city_rainfall_df.columns = ['city_max_day_rain', 'city_month_total_rain', 'city_avg_daily_rain']

In [5]:
per_city_rainfall_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,city_max_day_rain,city_month_total_rain,city_avg_daily_rain
CITY,Month,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bridgewater,1,2011,1.25,3.18,0.106000
Bridgewater,1,2012,1.10,3.05,0.098387
Bridgewater,1,2013,1.16,2.62,0.084516
Bridgewater,1,2014,0.58,2.99,0.096452
Bridgewater,1,2015,1.69,4.26,0.137419
...,...,...,...,...,...
Somerville,12,2017,0.20,0.23,0.115000
Somerville,12,2018,0.00,0.00,0.000000
Somerville,12,2019,1.07,1.13,0.282500
Somerville,12,2020,0.83,0.83,0.276667


In [6]:
# Add column to calculate percentage max rain / total accumulated for month (used to determine if storm occurred)
per_city_rainfall_df['Anomalypct'] = per_city_rainfall_df['city_max_day_rain'] / per_city_rainfall_df['city_month_total_rain'] 
per_city_rainfall_df['Anomalypct'] = per_city_rainfall_df['Anomalypct'].fillna(0)

In [7]:
# Add column to indicate if storm within the month where rainfall for the day over month exceeds 70% of total rain for month
per_city_rainfall_df['Anomaly'] = np.where(per_city_rainfall_df.Anomalypct > .70, 'Y', 'N')

In [8]:
per_city_rainfall_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,city_max_day_rain,city_month_total_rain,city_avg_daily_rain,Anomalypct,Anomaly
CITY,Month,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bridgewater,1,2011,1.25,3.18,0.106000,0.393082,N
Bridgewater,1,2012,1.10,3.05,0.098387,0.360656,N
Bridgewater,1,2013,1.16,2.62,0.084516,0.442748,N
Bridgewater,1,2014,0.58,2.99,0.096452,0.193980,N
Bridgewater,1,2015,1.69,4.26,0.137419,0.396714,N
...,...,...,...,...,...,...,...
Somerville,12,2017,0.20,0.23,0.115000,0.869565,Y
Somerville,12,2018,0.00,0.00,0.000000,0.000000,N
Somerville,12,2019,1.07,1.13,0.282500,0.946903,Y
Somerville,12,2020,0.83,0.83,0.276667,1.000000,Y


In [9]:
# Export to DF to CSV
per_city_rainfall_df.to_csv("Resources/per_city_rainfall.csv")