In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load Excel file
file_path = 'Data_Traffic_weather_Event.xlsx'  # Update if using a different path
excel_data = pd.ExcelFile(file_path)

In [3]:
# Load each sheet
weather_df = excel_data.parse('weather data')
event_df = excel_data.parse('Event data')
traffic_df = excel_data.parse('traffic data')

In [4]:
weather_df.head()

Unnamed: 0,DateTime,temp,humidity,preciptype,windspeed
0,2015-11-01 00:00:00,22.444444,71.7,Clear,12.8
1,2015-11-01 01:00:00,22.444444,71.7,Clouds,12.8
2,2015-11-01 02:00:00,22.444444,71.7,Clouds,12.8
3,2015-11-01 03:00:00,22.444444,71.7,Clouds,12.8
4,2015-11-01 04:00:00,22.444444,71.7,Clouds,12.8


In [5]:
event_df.head()

Unnamed: 0,DateTime,event_name,category,location,expected_traffic_impact
0,2015-11-01 00:00:00,,,,
1,2015-11-01 01:00:00,,,,
2,2015-11-01 02:00:00,,,,
3,2015-11-01 03:00:00,,,,
4,2015-11-01 04:00:00,,,,


In [6]:
traffic_df.head()

Unnamed: 0,DateTime,Junction,Vehicles,ID
0,2015-11-01 00:00:00,1,15,20151101001
1,2015-11-01 01:00:00,1,13,20151101011
2,2015-11-01 02:00:00,1,10,20151101021
3,2015-11-01 03:00:00,1,7,20151101031
4,2015-11-01 04:00:00,1,9,20151101041


In [7]:
event_df.isnull().sum()

DateTime                    0
event_name                 13
category                   13
location                   13
expected_traffic_impact    13
dtype: int64

In [8]:
# Fill missing values in Event data
event_df.fillna({
    'event_name': 'None',
    'category': 'None',
    'location': 'None',
    'expected_traffic_impact': 'None'
}, inplace=True)

In [9]:
# Merge traffic with weather
traffic_weather_df = pd.merge(
    traffic_df,
    weather_df[['DateTime', 'temp', 'humidity', 'preciptype', 'windspeed']],
    on='DateTime',
    how='left'
)

In [10]:
# Merge with event data
unified_df = pd.merge(
    traffic_weather_df,
    event_df[['DateTime', 'event_name', 'category', 'location', 'expected_traffic_impact']],
    on='DateTime',
    how='left')

unified_df.head()



Unnamed: 0,DateTime,Junction,Vehicles,ID,temp,humidity,preciptype,windspeed,event_name,category,location,expected_traffic_impact
0,2015-11-01,1,15,20151101001,22.444444,71.7,Clear,12.8,,,,
1,2015-11-01,1,15,20151101001,22.444444,71.7,Clear,12.8,,,,
2,2015-11-01,1,15,20151101001,22.444444,71.7,Clear,12.8,,,,
3,2015-11-01,1,15,20151101001,22.444444,71.7,Clouds,12.8,,,,
4,2015-11-01,1,15,20151101001,22.444444,71.7,Clouds,12.8,,,,


In [11]:
#Convert 'DateTime' to datetime
weather_df['DateTime'] = pd.to_datetime(weather_df['DateTime'])
event_df['DateTime'] = pd.to_datetime(event_df['DateTime'])
traffic_df['DateTime'] = pd.to_datetime(traffic_df['DateTime'])

In [16]:
unified_df.to_csv('cleaned_traffic_weather_event_data.csv', index=False)

In [17]:
scaler = MinMaxScaler()
numeric_cols = ['temp', 'humidity', 'windspeed', 'Vehicles']
unified_df[numeric_cols] = scaler.fit_transform(unified_df[numeric_cols])

In [18]:
unified_df.to_csv('cleaned_normalized_traffic_weather_event_data.csv', index=False)