1. Setup + Imports

In [58]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

2. Data Preparation
    - loading dataset
    - preprocessing
    - creating sequences
    - splitting data

In [None]:
df = pd.read_csv("train delay data.csv")
print(df.shape)
df.head()

ordinal_vars_order = {'Route Congestion': ['Low', 'Medium', 'High']}
#ordinal columns dictionary
    #keys: column names
    #values: order of values from small to large
   
def encode(df, ordinal_vars_order = None): 
    #categorical data 
    categorical_columns = df.select_dtypes(include=['object']).columns

    if ordinal_vars_order is not None: 
        for i in ordinal_vars_order.keys():
            categorical_columns = categorical_columns.delete(categorical_columns.get_loc(i)) 
            order = ordinal_vars_order[i]
            df[f'{i}_ordered'] = df[i].astype(pd.CategoricalDtype(categories = order, ordered = True))
            df[f'{i}_Encoded'] = df[f'{i}_ordered'].cat.codes
            del df[f'{i}_ordered']
            del df[i]

    df = pd.get_dummies(df, columns=categorical_columns)
    scaler = MinMaxScaler()
    df_scaled = scaler.fit_transform(df.drop(columns=[]))
    df_scaled = pd.DataFrame(df_scaled, columns=df.columns)
    return df_scaled

df_scaled = encode(df, ordinal_vars_order)
display(df_scaled)

(2878, 7)


Unnamed: 0,Distance Between Stations (km),Historical Delay (min),Route Congestion_Encoded,Weather Conditions_Clear,Weather Conditions_Foggy,Weather Conditions_Rainy,Day of the Week_Friday,Day of the Week_Monday,Day of the Week_Saturday,Day of the Week_Sunday,Day of the Week_Thursday,Day of the Week_Tuesday,Day of the Week_Wednesday,Time of Day_Afternoon,Time of Day_Evening,Time of Day_Morning,Time of Day_Night,Train Type_Express,Train Type_Local,Train Type_Superfast
0,0.104712,0.004065,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1,0.157068,0.008130,0.5,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.209424,0.012195,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.052356,0.001626,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,0.078534,0.006504,0.5,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2873,0.989529,0.983740,0.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
2874,0.968586,0.987805,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2875,0.994764,0.991870,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2876,0.973822,0.995935,0.5,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


3. Model set up
    - Define LSTM architecture
    - Initialize model, optimizer, and loss function


4. Training Loop
    - Train model over epochs
    - Track loss and accuracy
    - Save best model (optional)

5. Evaluation & Results
    - Evaluate on test set
    - Plot training/validation loss curves
    - Visualize predicted vs actual delays
