## Combining 2023 Sensor and Weather Data

### Import the necessary libraries

In [None]:
import pandas as pd

### Read in sensor data (created in sensor_data_concat.ipynb file)

In [None]:
sensor_data = pd.read_csv('/content/drive/MyDrive/PeMS Data/sensor_data.csv')
print(f'Length of Sensor Data df: {len(sensor_data)}')

### Read in sensor data (downloaded directly from PeMS website) 

In [None]:
station_path = '/content/drive/MyDrive/PeMS Data/stations.txt'
cols_index = [0, 1, 4, 5, 7, 8, 9, 13]
cols_dtypes = [int, int, int, float, float, str]
station_info = pd.read_csv(station_path, usecols=cols_index , delimiter='\t')
station_info = station_info[station_info['Fwy'] == 280].reset_index(drop=True)
station_info.drop('Fwy', axis=1, inplace=True)
station_info.head()

### Read in 2023 weather data (created in weather_data_concat.ipynb file) 

In [None]:
weather_info = pd.read_csv('/content/drive/MyDrive/PeMS Data/weather_data.csv')
weather_info['dt'] = pd.to_datetime(weather_info['dt'], utc=True)
weather_info['Year'] = weather_info['dt'].dt.year
weather_info['Month'] = weather_info['dt'].dt.month
weather_info['Day'] = weather_info['dt'].dt.day
weather_info['Hour'] = weather_info['dt'].dt.hour
weather_info.drop('weather', axis=1, inplace=True, errors='ignore')
weather_info.head()

### Joining Sensor Data, Station Data, and Weather Data

In [None]:
merged_df = pd.merge(sensor_data, station_info, left_on='Station', right_on='ID', how='left')
merged_df = merged_df.dropna(subset=['Avg Speed'])
merged_df['Timestamp'] = pd.to_datetime(merged_df['Timestamp'])
merged_df['Year'] = merged_df['Timestamp'].dt.year
merged_df['Month'] = merged_df['Timestamp'].dt.month
merged_df['Day'] = merged_df['Timestamp'].dt.day
merged_df['DayOfWeek'] = merged_df['Timestamp'].dt.day_name()
merged_df['Hour'] = merged_df['Timestamp'].dt.hour
merged_df['Minutes'] = merged_df['Timestamp'].dt.minute
merged_df['minutesOfDay'] = merged_df['Timestamp'].dt.hour * 60 + merged_df['Timestamp'].dt.minute
merged_df = pd.merge(merged_df, weather_info, on=['Year', 'Month', 'Day', 'Hour'], how='left')

merged_df = pd.get_dummies(merged_df, columns=['DayOfWeek', 'City', 'County', 'weather.desc'])
merged_df.head()

### Splitting into Northbound and Southbound

In [None]:
df_north = merged_df[merged_df['Direction'] == 'N']
df_south = merged_df[merged_df['Direction'] == 'S']

formatted_north_count = "{:,}".format(len(df_north))
formatted_south_count = "{:,}".format(len(df_south))

print("North: " + formatted_north_count)
print("South: " + formatted_south_count)