**1\. Environment Setup**

- Import the necessary python libraries
- Ignore warnings
- File path setup

In [3]:
import glob             # File path & directory 
import pandas as pd     # Data manipulation & analysis
import numpy as np      # Multi-dimensional arrays and Numerical computation

import warnings         # Warnings
warnings.filterwarnings("ignore")

In [8]:
# Azure VM switch (Code runs from inside the Azure VM)
VM = 0

filepath = r'C:\Users\ajonnavittula\OneDrive - Highways England\Project Work\Datasets' if VM == 1 else r'C:\Users\jonnaa\OneDrive - Highways England\Project Work\Datasets'

**2\. Locate the Weather files & store the path+file into a .txt file**

- Create a text file with the list of all the weather-based files stored in .csv format

In [9]:
# Files where the Weather data is stored by Year/Date
path = ( filepath + '\Weather\Daily')

# Fetch all .csv files in the given path
list1 = []
for files in glob.glob(path + '\*\*.csv'):
    list1.append(files.replace('\\', '/'))

print('Total files found: ', len(list1))
print(list1[0:5])

#Store the data into a .txt file to use/validate later

#open text file
text_file = open(path + "/Glob_List.txt", "w")

for item in list1:
    #write string to file
    text_file.write(item + '\n')
 
#close file
text_file.close()

Total files found:  729
['C:/Users/jonnaa/OneDrive - Highways England/Project Work/Datasets/Weather/Daily/2018/2018_01_01.csv', 'C:/Users/jonnaa/OneDrive - Highways England/Project Work/Datasets/Weather/Daily/2018/2018_01_02.csv', 'C:/Users/jonnaa/OneDrive - Highways England/Project Work/Datasets/Weather/Daily/2018/2018_01_03.csv', 'C:/Users/jonnaa/OneDrive - Highways England/Project Work/Datasets/Weather/Daily/2018/2018_01_04.csv', 'C:/Users/jonnaa/OneDrive - Highways England/Project Work/Datasets/Weather/Daily/2018/2018_01_05.csv']


**3\. Open each file, perform filtering, grouping and save the individual .csv files**

- For each file in the list, open and extract required columns
    - Group by RadarID and Timestamp (to extract date), aggregate using mean, median, min and max values rounded by 2 decimal places
    - Extract date from each timestamp and rename the column as Date
    - Replace NaN values with nulls and reset the index
    - Append the data to a dataframe
- Drop the redundant dataframes

In [2]:
# Initialize a dataframe - initial weather extract
df_weather_init = pd.DataFrame()

# Weather files Open & Perform Filtering
for file in list1:

    # Weather data
    df_weather_raw = []
    df_weather_raw = pd.read_csv(file)
    df_weather_raw = df_weather_raw[['Timestamp', 'Present_Weather', 'Air_Temp', 'Average_Wind_Speed', \
                                'Wind_Direction', 'Wind_Gust_Speed', 'Wind_Gust_Direction', \
                                'Rain_Intensity', 'RadarID']]
    df_grp = df_weather_raw.groupby(['RadarID', df_weather_raw['Timestamp'].str.split(' ',expand=True)[0]]) \
                            .agg({'Present_Weather': ['mean', 'median', 'min', 'max'] , 'Air_Temp': ['mean', 'median', 'min', 'max'], \
                                'Average_Wind_Speed': ['mean', 'median', 'min', 'max'], 'Wind_Direction': ['mean', 'median', 'min', 'max'], \
                                'Wind_Gust_Speed': ['mean', 'median', 'min', 'max'], 'Wind_Gust_Direction': ['mean', 'median', 'min', 'max'], \
                                'Rain_Intensity': ['mean', 'median', 'min', 'max']}) \
                            .round(2) \
                            .replace(np.nan, '') \
                            .reset_index() 
    df_grp = df_grp.rename(columns={df_grp.columns[1][0]: 'Date'})
    df_weather_init = df_weather_init.append(df_grp)

    # df_grp.to_csv('C:/Users/ajonnavittula/OneDrive - Highways England/Desktop/Datasets/Weather/Condensed/' + file[-14:]) 
    del df_weather_raw , df_grp

df_weather_init.head(5)


Unnamed: 0_level_0,RadarID,Date,Present_Weather,Present_Weather,Present_Weather,Present_Weather,Air_Temp,Air_Temp,Air_Temp,Air_Temp,...,Wind_Gust_Speed,Wind_Gust_Speed,Wind_Gust_Direction,Wind_Gust_Direction,Wind_Gust_Direction,Wind_Gust_Direction,Rain_Intensity,Rain_Intensity,Rain_Intensity,Rain_Intensity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,median,min,max,mean,median,min,max,...,min,max,mean,median,min,max,mean,median,min,max
0,10000,2018-01-01,94.17,100.0,50.0,100.0,4.49,4.6,1.9,6.4,...,2.52,35.64,252.82,295.5,1.0,357.0,0.03,0.0,0.0,0.75
1,10001,2018-01-01,99.65,100.0,50.0,100.0,4.04,4.2,1.7,6.1,...,8.28,48.96,281.7,284.0,7.0,343.0,0.0,0.0,0.0,0.44
2,10002,2018-01-01,,,,,3.1,3.25,1.0,5.6,...,3.96,42.48,,,,,0.0,0.0,0.0,0.0
3,10003,2018-01-01,,,,,4.94,5.0,2.7,7.6,...,2.52,28.44,244.24,301.0,0.0,357.0,0.18,0.0,0.0,7.44
4,10004,2018-01-01,99.1,100.0,50.0,100.0,4.66,4.3,3.1,8.0,...,0.36,19.8,146.19,121.0,1.0,349.0,0.01,0.0,0.0,0.57


**4\. Merge the location names by RadarID and give column aliases**

- Import ESS location names (Radar ID location name) and merge with previously created dataframe
- Drop redundant columns and apply column aliases

In [6]:
# Raw file to staging file
df_weather = df_weather_init

# Import Location Names 
df_Locations = pd.read_csv (filepath + 'SWIS_Locations/ESS locations.csv')
df_Locations = df_Locations.iloc[:,0:2]

# Merge the location names with Radar IDs
df_weather = pd.merge(df_weather, df_Locations,  how='left', left_on=['RadarID'], right_on = ['ObsPointESSId'])

# Drop unnecessary columns
df_weather = df_weather.drop('ObsPointESSId', 1)

# Column aliases
df_weather.columns = ['RadarID', 'Date', \
                      'Present_Weather_Mean', 'Present_Weather_Median', 'Present_Weather_Min', 'Present_Weather_Max', \
                      'Air_Temp_Mean', 'Air_Temp_Median', 'Air_Temp_Min', 'Air_Temp_Max', \
                      'Average_Wind_Speed_Mean', 'Average_Wind_Speed_Median', 'Average_Wind_Speed_Min', 'Average_Wind_Speed_Max', \
                      'Wind_Direction_Mean', 'Wind_Direction_Median', 'Wind_Direction_Min', 'Wind_Direction_Max', \
                      'Wind_Gust_Mean', 'Wind_Gust_Median', 'Wind_Gust_Min', 'Wind_Gust_Max', \
                      'Wind_Gust_Direction_Mean', 'Wind_Gust_Direction_Median', 'Wind_Gust_Direction_Min', 'Wind_Gust_Direction_Max', \
                      'Rain_Intensity_Mean', 'Rain_Intensity_Median', 'Rain_Intensity_Min', 'Rain_Intensity_Max', \
                       'ESS_Location']

df_weather.head(3)

Unnamed: 0,RadarID,Date,Present_Weather_Mean,Present_Weather_Median,Present_Weather_Min,Present_Weather_Max,Air_Temp_Mean,Air_Temp_Median,Air_Temp_Min,Air_Temp_Max,...,Wind_Gust_Max,Wind_Gust_Direction_Mean,Wind_Gust_Direction_Median,Wind_Gust_Direction_Min,Wind_Gust_Direction_Max,Rain_Intensity_Mean,Rain_Intensity_Median,Rain_Intensity_Min,Rain_Intensity_Max,ESS_Location
0,10000,2018-01-01,94.17,100.0,50.0,100.0,4.49,4.6,1.9,6.4,...,35.64,252.82,295.5,1.0,357.0,0.03,0.0,0.0,0.75,A1 - Stannington
1,10001,2018-01-01,99.65,100.0,50.0,100.0,4.04,4.2,1.7,6.1,...,48.96,281.7,284.0,7.0,343.0,0.0,0.0,0.0,0.44,A1 - Rosebrough
2,10002,2018-01-01,,,,,3.1,3.25,1.0,5.6,...,42.48,,,,,0.0,0.0,0.0,0.0,A696 - Raechester - LHA


**5\. Time Intelligence to aggregate parameters based on last 48 hours, 96 hours, 1 week, and fortnight**

- As part of the transformation, apply necessary datatypes on the Date field and replace nulls with zeros to avoid errors. 
- Sort the dataframe by RadarID and date
- Group by RadarID and apply aggregation on the columns with the last 48 hr, 96 hr, 1 week and 2 week ranges, and round off by 3 decimal places

In [11]:
# Necessary Transformations
df_weather.Date = df_weather.Date.astype('datetime64')
df_weather.Rain_Intensity_Mean = df_weather.Rain_Intensity_Mean.replace('', 0)
df_weather.Rain_Intensity_Max = df_weather.Rain_Intensity_Max.replace('', 0)
df_weather.Average_Wind_Speed_Mean = df_weather.Average_Wind_Speed_Mean.replace('', 0)
df_weather.Wind_Direction_Mean = df_weather.Wind_Direction_Mean.replace('', 0)
df_weather.Wind_Gust_Mean = df_weather.Wind_Gust_Mean.replace('', 0)
df_weather.Wind_Gust_Direction_Mean = df_weather.Wind_Gust_Direction_Mean.replace('', 0)

# Sort the Weather DF by RadarID and Date
df_weather = df_weather.sort_values(by=['RadarID', 'Date']).reset_index()
df_weather = df_weather.drop('index', 1)

# Create a GroupBy df
id_grp = df_weather.groupby(['RadarID'])
id_grp.head(15)

df_weather_hist = []

for df_grp in id_grp:
    df_grp[1]['Rain_Intensity_Avg_Last_48hrs'] =   (df_grp[1]['Rain_Intensity_Mean'] +\
                                                    df_grp[1]['Rain_Intensity_Mean'].shift().fillna(0) )  / 2
    df_grp[1]['Rain_Intensity_Avg_Last_96hrs'] =   (df_grp[1]['Rain_Intensity_Mean'] + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(2).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(3).fillna(0) )  / 4
    df_grp[1]['Rain_Intensity_Avg_Last_Week']  =   (df_grp[1]['Rain_Intensity_Mean'] + df_grp[1]['Rain_Intensity_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(2).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(4).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(6).fillna(0) )  / 7
    df_grp[1]['Rain_Intensity_Avg_Last_2_Weeks'] =  (df_grp[1]['Rain_Intensity_Mean'] + df_grp[1]['Rain_Intensity_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(2).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(4).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(6).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(7).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(8).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(9).fillna(0) + 
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(10).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(11).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Mean'].shift(12).fillna(0) + df_grp[1]['Rain_Intensity_Mean'].shift(13).fillna(0) )  / 14
    df_grp[1]['Rain_Intensity_Max_Last_48hrs'] =   (df_grp[1]['Rain_Intensity_Max'] +\
                                                    df_grp[1]['Rain_Intensity_Max'].shift().fillna(0) )  / 2
    df_grp[1]['Rain_Intensity_Max_Last_96hrs'] =   (df_grp[1]['Rain_Intensity_Max'] + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(2).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(3).fillna(0) )  / 4
    df_grp[1]['Rain_Intensity_Max_Last_Week']  =   (df_grp[1]['Rain_Intensity_Max'] + df_grp[1]['Rain_Intensity_Max'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(2).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(3).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(4).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(5).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(6).fillna(0) )  / 7
    df_grp[1]['Rain_Intensity_Max_Last_2_Weeks'] =  (df_grp[1]['Rain_Intensity_Max'] + df_grp[1]['Rain_Intensity_Max'].shift().fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(2).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(3).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(4).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(5).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(6).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(7).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(8).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(9).fillna(0) + 
                                                    df_grp[1]['Rain_Intensity_Max'].shift(10).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(11).fillna(0) + \
                                                    df_grp[1]['Rain_Intensity_Max'].shift(12).fillna(0) + df_grp[1]['Rain_Intensity_Max'].shift(13).fillna(0) )  / 14  
    df_grp[1]['Wind_Speed_Mean_Last_48hrs'] =   (df_grp[1]['Average_Wind_Speed_Mean'] +\
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift().fillna(0) )  / 2
    df_grp[1]['Wind_Speed_Mean_Last_96hrs'] =   (df_grp[1]['Average_Wind_Speed_Mean'] + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(2).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(3).fillna(0) )  / 4
    df_grp[1]['Wind_Speed_Mean_Last_Week']  =   (df_grp[1]['Average_Wind_Speed_Mean'] + df_grp[1]['Average_Wind_Speed_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(2).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(4).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(6).fillna(0) )  / 7
    df_grp[1]['Wind_Speed_Mean_Last_2_Weeks'] =  (df_grp[1]['Average_Wind_Speed_Mean'] + df_grp[1]['Average_Wind_Speed_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(2).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(4).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(6).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(7).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(8).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(9).fillna(0) + 
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(10).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(11).fillna(0) + \
                                                    df_grp[1]['Average_Wind_Speed_Mean'].shift(12).fillna(0) + df_grp[1]['Average_Wind_Speed_Mean'].shift(13).fillna(0) )  / 14  
    df_grp[1]['Wind_Direction_Mean_Last_48hrs'] =   (df_grp[1]['Wind_Direction_Mean'] +\
                                                    df_grp[1]['Wind_Direction_Mean'].shift().fillna(0) )  / 2
    df_grp[1]['Wind_Direction_Mean_Last_96hrs'] =   (df_grp[1]['Wind_Direction_Mean'] + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(2).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(3).fillna(0) )  / 4
    df_grp[1]['Wind_Direction_Mean_Last_Week']  =   (df_grp[1]['Wind_Direction_Mean'] + df_grp[1]['Wind_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(6).fillna(0) )  / 7
    df_grp[1]['Wind_Direction_Mean_Last_2_Weeks'] =  (df_grp[1]['Wind_Direction_Mean'] + df_grp[1]['Wind_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(6).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(7).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(8).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(9).fillna(0) + 
                                                    df_grp[1]['Wind_Direction_Mean'].shift(10).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(11).fillna(0) + \
                                                    df_grp[1]['Wind_Direction_Mean'].shift(12).fillna(0) + df_grp[1]['Wind_Direction_Mean'].shift(13).fillna(0) )  / 14  
    df_grp[1]['Wind_Gust_Mean_Last_48hrs'] =   (df_grp[1]['Wind_Gust_Mean'] +\
                                                    df_grp[1]['Wind_Gust_Mean'].shift().fillna(0) )  / 2
    df_grp[1]['Wind_Gust_Mean_Last_96hrs'] =   (df_grp[1]['Wind_Gust_Mean'] + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(2).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(3).fillna(0) )  / 4
    df_grp[1]['Wind_Gust_Mean_Last_Week']  =   (df_grp[1]['Wind_Gust_Mean'] + df_grp[1]['Wind_Gust_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(6).fillna(0) )  / 7
    df_grp[1]['Wind_Gust_Mean_Last_2_Weeks'] =  (df_grp[1]['Wind_Gust_Mean'] + df_grp[1]['Wind_Gust_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(6).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(7).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(8).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(9).fillna(0) + 
                                                    df_grp[1]['Wind_Gust_Mean'].shift(10).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(11).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Mean'].shift(12).fillna(0) + df_grp[1]['Wind_Gust_Mean'].shift(13).fillna(0) )  / 14  
    df_grp[1]['Wind_Gust_Direction_Mean_Last_48hrs'] =   (df_grp[1]['Wind_Gust_Direction_Mean'] +\
                                                        df_grp[1]['Wind_Gust_Direction_Mean'].shift().fillna(0) )  / 2
    df_grp[1]['Wind_Gust_Direction_Mean_Last_96hrs'] =   (df_grp[1]['Wind_Gust_Direction_Mean'] + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(2).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(3).fillna(0) )  / 4
    df_grp[1]['Wind_Gust_Direction_Mean_Last_Week']  =   (df_grp[1]['Wind_Gust_Direction_Mean'] + df_grp[1]['Wind_Gust_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(6).fillna(0) )  / 7
    df_grp[1]['Wind_Gust_Direction_Mean_Last_2_Weeks'] =  (df_grp[1]['Wind_Gust_Direction_Mean'] + df_grp[1]['Wind_Gust_Direction_Mean'].shift().fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(2).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(3).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(4).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(5).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(6).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(7).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(8).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(9).fillna(0) + 
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(10).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(11).fillna(0) + \
                                                    df_grp[1]['Wind_Gust_Direction_Mean'].shift(12).fillna(0) + df_grp[1]['Wind_Gust_Direction_Mean'].shift(13).fillna(0) )  / 14                                                                                 
    df_weather_hist.append(df_grp[1])

df_weather_hist = pd.concat(df_weather_hist)

df_weather_hist['Rain_Intensity_Avg_Last_48hrs'] = df_weather_hist['Rain_Intensity_Avg_Last_48hrs'].round(3)
df_weather_hist['Rain_Intensity_Avg_Last_96hrs'] = df_weather_hist['Rain_Intensity_Avg_Last_96hrs'].round(3)
df_weather_hist['Rain_Intensity_Avg_Last_Week'] = df_weather_hist['Rain_Intensity_Avg_Last_Week'].round(3)
df_weather_hist['Rain_Intensity_Avg_Last_2_Weeks'] = df_weather_hist['Rain_Intensity_Avg_Last_2_Weeks'].round(3)

df_weather_hist['Rain_Intensity_Max_Last_48hrs'] = df_weather_hist['Rain_Intensity_Max_Last_48hrs'].round(3)
df_weather_hist['Rain_Intensity_Max_Last_96hrs'] = df_weather_hist['Rain_Intensity_Max_Last_96hrs'].round(3)
df_weather_hist['Rain_Intensity_Max_Last_Week'] = df_weather_hist['Rain_Intensity_Max_Last_Week'].round(3)
df_weather_hist['Rain_Intensity_Max_Last_2_Weeks'] = df_weather_hist['Rain_Intensity_Max_Last_2_Weeks'].round(3)

df_weather_hist['Wind_Speed_Mean_Last_48hrs'] = df_weather_hist['Wind_Speed_Mean_Last_48hrs'].round(3)
df_weather_hist['Wind_Speed_Mean_Last_96hrs'] = df_weather_hist['Wind_Speed_Mean_Last_96hrs'].round(3)
df_weather_hist['Wind_Speed_Mean_Last_Week'] = df_weather_hist['Wind_Speed_Mean_Last_Week'].round(3)
df_weather_hist['Wind_Speed_Mean_Last_2_Weeks'] = df_weather_hist['Wind_Speed_Mean_Last_2_Weeks'].round(3)

df_weather_hist['Wind_Direction_Mean_Last_48hrs'] = df_weather_hist['Wind_Direction_Mean_Last_48hrs'].round(3)
df_weather_hist['Wind_Direction_Mean_Last_96hrs'] = df_weather_hist['Wind_Direction_Mean_Last_96hrs'].round(3)
df_weather_hist['Wind_Direction_Mean_Last_Week'] = df_weather_hist['Wind_Direction_Mean_Last_Week'].round(3)
df_weather_hist['Wind_Direction_Mean_Last_2_Weeks'] = df_weather_hist['Wind_Direction_Mean_Last_2_Weeks'].round(3)

df_weather_hist['Wind_Gust_Mean_Last_48hrs'] = df_weather_hist['Wind_Gust_Mean_Last_48hrs'].round(3)
df_weather_hist['Wind_Gust_Mean_Last_96hrs'] = df_weather_hist['Wind_Gust_Mean_Last_96hrs'].round(3)
df_weather_hist['Wind_Gust_Mean_Last_Week'] = df_weather_hist['Wind_Gust_Mean_Last_Week'].round(3)
df_weather_hist['Wind_Gust_Mean_Last_2_Weeks'] = df_weather_hist['Wind_Gust_Mean_Last_2_Weeks'].round(3)

df_weather_hist['Wind_Gust_Direction_Mean_Last_48hrs'] = df_weather_hist['Wind_Gust_Direction_Mean_Last_48hrs'].round(3)
df_weather_hist['Wind_Gust_Direction_Mean_Last_96hrs'] = df_weather_hist['Wind_Gust_Direction_Mean_Last_96hrs'].round(3)
df_weather_hist['Wind_Gust_Direction_Mean_Last_Week'] = df_weather_hist['Wind_Gust_Direction_Mean_Last_Week'].round(3)
df_weather_hist['Wind_Gust_Direction_Mean_Last_2_Weeks'] = df_weather_hist['Wind_Gust_Direction_Mean_Last_2_Weeks'].round(3)

df_weather_hist.head(5)

Unnamed: 0,RadarID,Date,Present_Weather_Mean,Present_Weather_Median,Present_Weather_Min,Present_Weather_Max,Air_Temp_Mean,Air_Temp_Median,Air_Temp_Min,Air_Temp_Max,...,Wind_Direction_Mean_Last_Week,Wind_Direction_Mean_Last_2_Weeks,Wind_Gust_Mean_Last_48hrs,Wind_Gust_Mean_Last_96hrs,Wind_Gust_Mean_Last_Week,Wind_Gust_Mean_Last_2_Weeks,Wind_Gust_Direction_Mean_Last_48hrs,Wind_Gust_Direction_Mean_Last_96hrs,Wind_Gust_Direction_Mean_Last_Week,Wind_Gust_Direction_Mean_Last_2_Weeks
0,10000,2018-01-01,94.17,100.0,50.0,100.0,4.49,4.6,1.9,6.4,...,35.976,17.988,8.6,4.3,2.457,1.229,126.41,63.205,36.117,18.059
1,10000,2018-01-02,90.21,100.0,50.0,100.0,4.14,3.8,2.2,6.6,...,64.899,32.449,17.935,8.968,5.124,2.562,256.46,128.23,73.274,36.637
2,10000,2018-01-03,88.47,100.0,50.0,100.0,5.83,6.4,3.2,7.5,...,102.71,51.355,28.285,18.442,10.539,5.269,268.125,197.267,112.724,56.362
3,10000,2018-01-04,77.57,60.0,50.0,100.0,4.66,4.9,3.2,6.1,...,131.776,65.888,25.11,21.522,12.299,6.149,234.825,245.642,140.367,70.184
4,10000,2018-01-05,91.39,100.0,50.0,100.0,2.98,3.4,0.1,4.9,...,165.236,82.618,14.795,21.54,14.766,7.383,163.18,215.652,159.347,79.674


**5\. Save all the weather data into a single .csv file**

- Save the aggregated weather data as a .csv format

In [12]:
df_weather_hist.to_csv(filepath + 'Weather/Weather_Aggregated.csv') 

len(df_weather)

188956