In [1]:
import pandas as pd
import numpy as np
import torch
import pickle
from data_preprocessing import prepare_lstm_data  
from model import LSTMWithMLP
from config import num_gaussians

# Function to prepare LSTM input data
def lstm_data(data, scaler):
    data = data.copy()  # Ensure a deep copy to avoid SettingWithCopyWarning
    data['total_trips'] = data['total_trips'].astype('float64')
    data[['total_trips']] = scaler.transform(data[['total_trips']])
    X_tensor = torch.tensor(data['total_trips'].values, dtype=torch.float32).unsqueeze(-1)
    X_lstm = X_tensor.unsqueeze(0)
    return X_lstm


def inverse_normalize_count(data, scaler):
    data = data.numpy().reshape(-1, 1)  # Convert tensor to numpy array and reshape
    data = scaler.inverse_transform(data)
    data = np.round(data).astype(int)  # Convert to int to avoid dtype warning
    return data

# Function to load and prepare MLP input data
def prepare_mlp_input(df_mlp, day):

    with open(r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily_scaler1.pkl", 'rb') as file:
        loaded_scaler1 = pickle.load(file)

    # Filter rows where the date matches current_date
    df_mlp['hour'] = df_mlp['date'].dt.hour
    df_mlp['day'] = df_mlp['date'].dt.day

    # Numeric features
    numeric_features = ['temperature_2m_max', 'temperature_2m_min', 'CRASH COUNT', 'precipitation_sum (mm)', 
                        'rain_sum (mm)', 'snowfall_sum (cm)', 'wind_speed_10m_max (km/h)']
    
    df_mlp[numeric_features] = loaded_scaler1.transform(df_mlp[numeric_features])

    # Remove 'date' column and convert to tensor
    mlp_input = df_mlp[df_mlp['day'] == day].drop(columns='date')
    mlp_tensor = torch.tensor(mlp_input.values, dtype=torch.float32)
    
    return mlp_tensor

# Function to predict demand
def predict_demand(X_lstm, mlp_tensor, model, scaler):
    model.eval()
    with torch.no_grad():
        pi, sigma, mu = model(X_lstm, mlp_tensor)
    mu = inverse_normalize_count(mu, scaler).flatten()
    pi = pi.flatten()
    sigma = sigma.flatten()
    output = pd.DataFrame({
        'demand': mu,
        'probability': pi,
        'volatility': sigma
    })
    # max_demand = output.loc[output['probability'].idxmax(), 'demand']
    return output

In [2]:
# Load data and preprocess
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily\2015-2019(daily_total_trips).csv"
df1 = pd.read_csv(path, parse_dates=['date'], dayfirst=False)
df1['date'] = pd.to_datetime(df1['date'])
initial_data = df1[df1['date'] >= pd.to_datetime('2019-12-25')].copy()

mlp_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\daily\2020mlp.csv"
df_mlp = pd.read_csv(mlp_path)
df_mlp['date'] = pd.to_datetime(df_mlp['date'])

# Prepare LSTM scaler
_, lstm_scaler = prepare_lstm_data()

# Load the LSTM+MLP model
model = LSTMWithMLP(lstm_input_size=1, output_size=1, num_gaussians=num_gaussians, mlp_input_dim=11)

state_path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\DL_daily\final_model_state.pth"
model.load_state_dict(torch.load(state_path))

Total windows created: 1
Total windows created: 2
Total windows created: 3
Total windows created: 4
Total windows created: 5


<All keys matched successfully>

In [3]:
# Set the date range for prediction from January 1, 2020, to June 30, 2020
date_range = pd.date_range(start='2020-01-01', end='2020-06-30')
all_predictions = []

for current_date in date_range:
    day_start = current_date - pd.Timedelta(days=7)
    data = df1[(df1['date'] >= day_start) & (df1['date'] < current_date)].copy()
    
    X_lstm = lstm_data(data, lstm_scaler)
    mlp_tensor = prepare_mlp_input(df_mlp, current_date.day)

    # Ensure the batch sizes match
    if X_lstm.size(0) != mlp_tensor.size(0):
        mlp_tensor = mlp_tensor[:X_lstm.size(0), :]

    predictions = predict_demand(X_lstm, mlp_tensor, model, lstm_scaler)
    max_demand = predictions['demand'][predictions['probability'].idxmax()]
    
    all_predictions.append(predictions)
    
    new_entry = pd.DataFrame({'date': [current_date], 'total_trips': [max_demand]})
    df1 = pd.concat([df1, new_entry], ignore_index=True)

all_predictions

[   demand  probability  volatility
 0   55626     0.422682    0.084802
 1   53205     0.199335    0.114389
 2   58210     0.235327    0.088952
 3   53466     0.013419    0.738765
 4   49826     0.129237    0.157106,
    demand  probability  volatility
 0   69353     0.316056    0.046798
 1   47084     0.198921    0.073672
 2   46725     0.373199    0.019851
 3   49910     0.005394    0.447699
 4   60217     0.106431    0.086020,
    demand  probability  volatility
 0   75538     0.280775    0.022098
 1   40539     0.195286    0.036522
 2   39070     0.439423    0.005845
 3   46228     0.002066    0.313688
 4   59164     0.082450    0.037947,
    demand  probability  volatility
 0   82068     0.243586    0.010364
 1   35053     0.186918    0.017893
 2   32930     0.506371    0.001712
 3   42827     0.000771    0.215227
 4   58052     0.062354    0.016463,
    demand  probability  volatility
 0   89030     0.206911    0.004849
 1   30510     0.175196    0.008730
 2   28058     0.571430 