In [None]:
# Package imports
import pandas as pd

In [1]:
# Load cleaned datasets
dispatch_data = pd.read_csv("data/cleaned_dispatch_data.csv")
traffic_data = pd.read_csv("data/cleaned_traffic_data.csv")
weather_data = pd.read_csv("data/cleaned_weather_data.csv")

# Preview datasets
print(dispatch_data.head())
print(traffic_data.head())
print(weather_data.head())


   dispatch_id  location_id       job_type                  dispatch_time  \
0            1          139    tire_change  2025-01-10 03:11:30.488504576   
1            2          129        lockout  2025-01-11 11:34:24.907076608   
2            3          115         towing  2025-01-11 03:34:28.379484416   
3            4          143    tire_change  2025-01-11 16:24:27.170795008   
4            5          108  fuel_delivery  2025-01-10 06:42:32.526908672   

                 completion_time  delay_minutes  
0  2025-01-10 03:33:30.488504576           22.0  
1  2025-01-11 11:40:24.907076608            6.0  
2  2025-01-11 04:18:28.379484416           44.0  
3  2025-01-11 16:57:27.170795008           33.0  
4  2025-01-10 06:58:32.526908672           16.0  
   location_id        date traffic_level
0          139  2025-01-10          high
1          129  2025-01-10      moderate
2          115  2025-01-10        severe
3          143  2025-01-10           low
4          108  2025-01-10      

# Create features

In [3]:
# Add hours features
dispatch_data['dispatch_hour'] = pd.to_datetime(dispatch_data['dispatch_time']).dt.hour
dispatch_data['dispatch_day'] = pd.to_datetime(dispatch_data['dispatch_time']).dt.day

# Category encoding
dispatch_data['job_type_encoded'] = dispatch_data['job_type'].astype('category').cat.codes
traffic_data['traffic_level_encoded'] = traffic_data['traffic_level'].map({'low': 1, 'moderate': 2, 'high': 3, 'severe': 4})

# Normalize/Standardize columns
weather_data['temperature_normalized'] = (weather_data['temperature'] - weather_data['temperature'].mean()) / weather_data['temperature'].std()
weather_data['precipitation_normalized'] = (weather_data['precipitation'] - weather_data['precipitation'].mean()) / weather_data['precipitation'].std()


# Merge Data

In [4]:
print("Columns in dispatch_data:")
print(dispatch_data.columns)

print("\nColumns in traffic_data:")
print(traffic_data.columns)

print("\nColumns in weather_data:")
print(weather_data.columns)

Columns in dispatch_data:
Index(['dispatch_id', 'location_id', 'job_type', 'dispatch_time',
       'completion_time', 'delay_minutes', 'date', 'dispatch_hour',
       'dispatch_day', 'job_type_encoded'],
      dtype='object')

Columns in traffic_data:
Index(['location_id', 'date', 'traffic_level', 'traffic_level_encoded'], dtype='object')

Columns in weather_data:
Index(['location_id', 'date', 'temperature', 'precipitation',
       'temperature_normalized', 'precipitation_normalized'],
      dtype='object')


   dispatch_id  location_id       job_type                  dispatch_time  \
0            1          139    tire_change  2025-01-10 03:11:30.488504576   
1            2          129        lockout  2025-01-11 11:34:24.907076608   
2            3          115         towing  2025-01-11 03:34:28.379484416   
3            4          143    tire_change  2025-01-11 16:24:27.170795008   
4            5          108  fuel_delivery  2025-01-10 06:42:32.526908672   

                 completion_time  delay_minutes        date  dispatch_hour  \
0  2025-01-10 03:33:30.488504576           22.0  2025-01-10              3   
1  2025-01-11 11:40:24.907076608            6.0  2025-01-11             11   
2  2025-01-11 04:18:28.379484416           44.0  2025-01-11              3   
3  2025-01-11 16:57:27.170795008           33.0  2025-01-11             16   
4  2025-01-10 06:58:32.526908672           16.0  2025-01-10              6   

   dispatch_day  job_type_encoded traffic_level  traffic_level_encod