In [59]:
import pandas as pd
from scipy.stats import norm
import pandas as pd
import numpy as np
import torch
from data_preprocessing import inverse_normalize_count, prepare_lstm_data
import pickle
from model import LSTMWithMLP

Here I wil predict the hourly traffic demand in a typical weekday to see traffic demand in each timeslots.
I will choose 2020-01-01 as an example

### 1. Preparation

Prepare data needed for LSTM

In [60]:
# Load the data needed for lstm
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\2015-2019 total trips.csv"
df1 = pd.read_csv(path)
df1['date'] = pd.to_datetime(df1['date'])
df1['hour'] = df1['date'].dt.hour
data = df1[df1['date'].dt.date == pd.to_datetime('2019-12-31').date()]
# Convert the total_trip_count column to float64
data['total_trips'] = data['total_trips'].astype('float64')

# Normalize the total_trip_count column
_, scaler = prepare_lstm_data()
data.loc[:, 'total_trips'] = scaler.fit_transform(data[['total_trips']])

# Create the sequence and convert the Pytorch tensor
X_tensor = torch.tensor(data['total_trips'].values, dtype=torch.float32).unsqueeze(-1)

X_lstm = X_tensor.unsqueeze(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['total_trips'] = data['total_trips'].astype('float64')


Total windows created: 1
Total windows created: 2
Total windows created: 3
Total windows created: 4
Total windows created: 5


Prepare the data needed for MLP

In [61]:
with open('scaler1.pkl', 'rb') as file:
    loaded_scaler1 = pickle.load(file)

with open('scaler2.pkl', 'rb') as file:
    loaded_scaler2 = pickle.load(file)

path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\predict MLP data.csv"
df2 = pd.read_csv(path)
df2['date'] = pd.to_datetime(df2['date'])
df2['hour'] = df2['date'].dt.hour
df2['month'] = df2['date'].dt.month

numeric_features1 = ['temperature_2m', 'CRASH COUNT']
df2[numeric_features1] = loaded_scaler1.transform(df2[numeric_features1])

numeric_features2 = ['precipitation', 'rain', 'snowfall', 'snow_depth', 'wind_speed_10m']
df2[numeric_features2] = loaded_scaler2.transform(df2[numeric_features2])

df2 = df2.drop(columns='date')
mlp_tensor = torch.tensor(df2.values, dtype=torch.float32)

### 2. Predict the total traffic demand

In [79]:
# Load the model
model = LSTMWithMLP(lstm_input_size=1, output_size=1, num_gaussians=5, mlp_input_dim=11)
model.load_state_dict(torch.load('final_model_state.pth'))

# Predict the demand
model.eval()

with torch.no_grad():
    pi, sigma, mu = model(X_lstm, mlp_tensor)

mu = inverse_normalize_count(mu, scaler)
mu = mu.flatten()  # Flatten mu to 1D array
pi = pi.flatten()
sigma = sigma.flatten()  # Convert sigma tensor to numpy and flatten

# Create the DataFrame
output = pd.DataFrame({
    'demand': mu,
    'probability': pi,
    'volatility': sigma  
})
output

max_demand = output.loc[output['probability'].idxmax(), 'demand']
print(f'The predicted total traffic demand is {max_demand}')

The predicted total traffic demand is 1747.0


Sampling remains necessary

### 3. Assign the traffic demand in specific route

Firstly, given the total demand, we need calculate the probability of each route in specific route.

We  will analyze the data from the first hour of January 1st for each year from 2015 to 2019, then get the probability of each specific scenario we defined.

In [80]:
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\trip_counts_with_probability.csv"
data = pd.read_csv(path)

data

Unnamed: 0,PULocationID,DOLocationID,time_period,total_trip_count,probability
0,4,13,last_20_min,4,0.000048
1,4,13,middle_20_min,2,0.000024
2,4,24,middle_20_min,1,0.000012
3,4,41,middle_20_min,1,0.000012
4,4,42,last_20_min,2,0.000024
...,...,...,...,...,...
8588,263,261,last_20_min,1,0.000012
8589,263,261,middle_20_min,4,0.000048
8590,263,262,first_20_min,33,0.000397
8591,263,262,last_20_min,53,0.000638
