In [6]:
import pandas as pd
import numpy as np
import torch
import pickle
from data_preprocessing import prepare_lstm_data  
from model import LSTMWithMLP

# Function to prepare LSTM input data
def lstm_data(data, scaler):
    if data.empty:
        print(f"No data available for transformation. Skipping...")
        return None  # You might want to handle this case differently based on your model's requirements

    data = data.copy()  # Make a copy to avoid SettingWithCopyWarning
    data['total_trips'] = data['total_trips'].astype('float64')
    data.loc[:, 'total_trips'] = scaler.transform(data[['total_trips']])  # Use .loc to modify in place
    X_tensor = torch.tensor(data['total_trips'].values, dtype=torch.float32).unsqueeze(-1)
    X_lstm = X_tensor.unsqueeze(0)
    return X_lstm


def inverse_normalize_count(data, scaler):
    data = data.numpy().reshape(-1, 1)  # Convert tensor to numpy array and reshape
    data = scaler.inverse_transform(data)
    data = np.round(data).astype(int)  # Convert to int to avoid dtype warning
    return data

# Function to load and prepare MLP input data
def prepare_mlp_input(df_mlp, current_date, scaler):
    # Prepare data selection
    daily_data = df_mlp[df_mlp['date'] == current_date]
    daily_data['hour'] = df_mlp['date'].dt.hour
    daily_data['month'] = df_mlp['date'].dt.month

    # Numeric features
    numeric_features = ['temperature_2m_max', 'temperature_2m_min', 'CRASH COUNT', 'precipitation_sum (mm)', 'rain_sum (mm)', 'snowfall_sum (cm)', 'wind_speed_10m_max (km/h)']
    
    df_mlp_numeric = df_mlp[numeric_features].values 
    df_mlp_numeric = scaler.transform(df_mlp_numeric)

    
    df_mlp[numeric_features] = df_mlp_numeric


    mlp_input = daily_data.drop(columns='date')
    mlp_tensor = torch.tensor(mlp_input.values, dtype=torch.float32)
    return mlp_tensor

# Function to predict demand
def predict_demand(X_lstm, mlp_tensor, model, scaler):
    model.eval()
    with torch.no_grad():
        pi, sigma, mu = model(X_lstm, mlp_tensor)
    mu = inverse_normalize_count(mu, scaler).flatten()
    pi = pi.flatten()
    sigma = sigma.flatten()
    output = pd.DataFrame({
        'demand': mu,
        'probability': pi,
        'volatility': sigma
    })
    max_demand = output.loc[output['probability'].idxmax(), 'demand']
    return max_demand


In [7]:
# Load data and preprocess
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily\2015-2019(daily_total_trips).csv"
df1 = pd.read_csv(path)
df1['date'] = pd.to_datetime(df1['date'])
initial_data = df1[df1['date'] >= pd.to_datetime('2019-12-25')]

mlp_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily\2020mlp.csv"
df_mlp = pd.read_csv(mlp_path)
df_mlp['date'] = pd.to_datetime(df_mlp['date'])

# Load scalers
with open(r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily_scaler1.pkl", 'rb') as file:
    loaded_scaler = pickle.load(file)

# Prepare LSTM scaler
_, lstm_scaler = prepare_lstm_data()

# Load the LSTM+MLP model
model = LSTMWithMLP(lstm_input_size=1, output_size=1, num_gaussians=5, mlp_input_dim=11)
state_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily_final_model_state.pth"
model.load_state_dict(torch.load(state_path))

results = []

# Set the date range for prediction from January 1, 2020, to June 30, 2020
date_range = pd.date_range(start='2020-01-01', end='2020-06-30')


Total windows created: 1
Total windows created: 2
Total windows created: 3
Total windows created: 4
Total windows created: 5


RuntimeError: Error(s) in loading state_dict for LSTMWithMLP:
	size mismatch for lstm.lstm.weight_ih_l0: copying a param with shape torch.Size([512, 1]) from checkpoint, the shape in current model is torch.Size([200, 1]).
	size mismatch for lstm.lstm.weight_hh_l0: copying a param with shape torch.Size([512, 128]) from checkpoint, the shape in current model is torch.Size([200, 50]).
	size mismatch for lstm.lstm.bias_ih_l0: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([200]).
	size mismatch for lstm.lstm.bias_hh_l0: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([200]).
	size mismatch for lstm.lstm.weight_ih_l1: copying a param with shape torch.Size([512, 128]) from checkpoint, the shape in current model is torch.Size([200, 50]).
	size mismatch for lstm.lstm.weight_hh_l1: copying a param with shape torch.Size([512, 128]) from checkpoint, the shape in current model is torch.Size([200, 50]).
	size mismatch for lstm.lstm.bias_ih_l1: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([200]).
	size mismatch for lstm.lstm.bias_hh_l1: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([200]).
	size mismatch for mlp.fc1.weight: copying a param with shape torch.Size([8, 11]) from checkpoint, the shape in current model is torch.Size([10, 11]).
	size mismatch for mlp.fc1.bias: copying a param with shape torch.Size([8]) from checkpoint, the shape in current model is torch.Size([10]).
	size mismatch for mlp.fc2.weight: copying a param with shape torch.Size([128, 8]) from checkpoint, the shape in current model is torch.Size([50, 10]).
	size mismatch for mlp.fc2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([50]).
	size mismatch for mdn.z_pi.weight: copying a param with shape torch.Size([5, 256]) from checkpoint, the shape in current model is torch.Size([5, 100]).
	size mismatch for mdn.z_sigma.weight: copying a param with shape torch.Size([5, 256]) from checkpoint, the shape in current model is torch.Size([5, 100]).
	size mismatch for mdn.z_mu.weight: copying a param with shape torch.Size([5, 256]) from checkpoint, the shape in current model is torch.Size([5, 100]).

In [8]:
for current_date in date_range:
    day_start = current_date - pd.Timedelta(days=7)
    data = df1[(df1['date'] >= day_start) & (df1['date'] < current_date)]

    X_lstm = lstm_data(data, lstm_scaler)
    if X_lstm is None:
        print(f"Skipping prediction for {current_date} due to no data.")
        continue  # Skip this date or handle differently

    mlp_tensor = prepare_mlp_input(df_mlp, current_date, loaded_scaler)
    daily_demand = predict_demand(X_lstm, mlp_tensor, model, lstm_scaler)
    
    results.append({
        'date': current_date,
        'predicted_demand': np.ceil(daily_demand).astype(int)
    })

results_df = pd.DataFrame(results)
results_df

No data available for transformation. Skipping...
Skipping prediction for 2020-01-08 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-09 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-10 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-11 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-12 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-13 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-14 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-15 00:00:00 due to no data.
No data available for transformation. Skipping...
Skipping prediction for 2020-01-16 00:00:00 due to no data.
No data av

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['hour'] = df_mlp['date'].dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['month'] = df_mlp['date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daily_data['hour'] = df_mlp['date'].dt.hour
A value is trying to be set on a copy of a slice from a DataFrame.
Try 

Unnamed: 0,date,predicted_demand
0,2020-01-01,515863
1,2020-01-02,129777
2,2020-01-03,135204
3,2020-01-04,135946
4,2020-01-05,136277
5,2020-01-06,136295
6,2020-01-07,135448
7,2020-01-01,117756
8,2020-01-02,48929
9,2020-01-03,51321


### 3. Assign the traffic demand in specific route

Firstly, given the total demand, we need calculate the probability of each route in specific route.

We  will analyze the data from the first hour of January 1st for each year from 2015 to 2019, then get the probability of each specific scenario we defined.

In [None]:
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\trip_counts_with_probability.csv"
data = pd.read_csv(path)

data['']