In [1]:
import pandas as pd
import numpy as np
import torch
import pickle
from data_preprocessing import inverse_normalize_count, prepare_lstm_data
from model import LSTMWithMLP

# Function to prepare LSTM input data
def lstm_data(data, scaler):
    data['total_trips'] = data['total_trips'].astype('float64')
    data.loc[:, 'total_trips'] = scaler.transform(data[['total_trips']])
    X_tensor = torch.tensor(data['total_trips'].values, dtype=torch.float32).unsqueeze(-1)
    X_lstm = X_tensor.unsqueeze(0)
    return X_lstm

# Function to load and prepare MLP input data
def prepare_mlp_input(df_mlp, hour):
    with open(r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\scaler1.pkl", 'rb') as file:
        loaded_scaler1 = pickle.load(file)

    with open(r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\scaler2.pkl", 'rb') as file:
        loaded_scaler2 = pickle.load(file)

    df_mlp['hour'] = df_mlp['date'].dt.hour
    df_mlp['month'] = df_mlp['date'].dt.month
    
    numeric_features1 = ['temperature_2m', 'CRASH COUNT']
    numeric_features2 = ['precipitation', 'rain', 'snowfall', 'snow_depth', 'wind_speed_10m']
    
    # Remove column names before scaling
    df_mlp_numeric1 = df_mlp[numeric_features1].to_numpy()
    df_mlp_numeric2 = df_mlp[numeric_features2].to_numpy()
    
    df_mlp_numeric1 = loaded_scaler1.transform(df_mlp_numeric1)
    df_mlp_numeric2 = loaded_scaler2.transform(df_mlp_numeric2)
    
    # Add scaled values back to dataframe
    df_mlp[numeric_features1] = df_mlp_numeric1
    df_mlp[numeric_features2] = df_mlp_numeric2
    
    mlp_input = df_mlp[df_mlp['hour'] == hour].drop(columns='date')
    mlp_tensor = torch.tensor(mlp_input.values, dtype=torch.float32)
    return mlp_tensor

# Function to predict demand
def predict_demand(X_lstm, mlp_tensor, model, scaler):
    model.eval()
    with torch.no_grad():
        pi, sigma, mu = model(X_lstm, mlp_tensor)
    mu = inverse_normalize_count(mu, scaler).flatten()
    pi = pi.flatten()
    sigma = sigma.flatten()
    output = pd.DataFrame({
        'demand': mu,
        'probability': pi,
        'volatility': sigma
    })
    max_demand = output.loc[output['probability'].idxmax(), 'demand']
    return max_demand

# Load data
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\2015-2019 total trips.csv"
df1 = pd.read_csv(path)
df1['date'] = pd.to_datetime(df1['date'])
df1['hour'] = df1['date'].dt.hour

initial_data = df1[df1['date'] >= pd.to_datetime('2019-12-31 00:00:00')]

mlp_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\2020-01-01_mlp.csv"
df_mlp = pd.read_csv(mlp_path)
df_mlp['date'] = pd.to_datetime(df_mlp['date'])

# Prepare LSTM scaler
_, lstm_scaler = prepare_lstm_data()

# Load the model
model = LSTMWithMLP(lstm_input_size=1, output_size=1, num_gaussians=5, mlp_input_dim=11)
state_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\final_model_state.pth"
model.load_state_dict(torch.load(state_path))


results = []

# Loop over each hour of January 1, 2020
for hour in range(24):
    if hour == 0:
        data = initial_data.tail(24)
    else:
        new_entry = pd.DataFrame({'date': [pd.to_datetime(f'2020-01-01 {hour-1}:00:00')], 'total_trips': [max_demand]})
        data = pd.concat([data, new_entry], ignore_index=True).tail(24)
    
    X_lstm = lstm_data(data, lstm_scaler)
    mlp_tensor = prepare_mlp_input(df_mlp, hour)

    max_demand = predict_demand(X_lstm, mlp_tensor, model, lstm_scaler)
    results.append(max_demand)

results_df = pd.DataFrame({
    'hour': range(24),
    'predicted_demand': results
})
results_df['predicted_demand'] = np.ceil(results_df['predicted_demand']).astype(int)

Total windows created: 1
Total windows created: 2
Total windows created: 3
Total windows created: 4
Total windows created: 5




In [2]:
results_df

Unnamed: 0,hour,predicted_demand
0,0,11613
1,1,29147
2,2,10398
3,3,31084
4,4,351894
5,5,-131402
6,6,-1170121
7,7,-1299652
8,8,-3870458
9,9,-30568297


### 3. Assign the traffic demand in specific route

Firstly, given the total demand, we need calculate the probability of each route in specific route.

We  will analyze the data from the first hour of January 1st for each year from 2015 to 2019, then get the probability of each specific scenario we defined.

In [3]:
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\trip_counts_with_probability.csv"
data = pd.read_csv(path)

data['']

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\yanzh\\Desktop\\code_and_data\\4. Deep learning part\\trip_counts_with_probability.csv'