In [16]:
import argparse
import json

In [1]:
# for running the code in the notebook
%load_ext autoreload
%autoreload 2
%aimport RNN_model, RNN_apply_ind

# Data Validation

In [None]:
'''
If not have run before
'''
import RNN_apply_ind, os, json, argparse

folder_path = r'C:\Users\qilei.zhang\OneDrive - Frontier Airlines\Documents\Data\USconti'
seats_file_name = r'\Schedule_Monthly_Summary_Report_Conti.csv'
perf_file_name = r'\Airline_Performance_Report_Conti.csv'
apply_file_name = '\Schedule_Monthly_Summary_2023Q1234.csv'
# Load parameters from the JSON file.
if not os.path.exists('parameters.json'):
    print("parameters.json does not exist, Find the file and put it in the same folder as this file")
with open('parameters.json', 'r') as f:
    args = argparse.Namespace(**json.load(f))

RNN_apply_ind.main_apply(args, folder_path, seats_file_name, perf_file_name, apply_file_name)

In [25]:
'''
quick look at the result
'''
import pandas as pd
from RNN_apply_ind import DataAna

apply_filename='./data/applying_data.csv'
ana_df_name = "./results/data_to_ana_apply.csv"
orig_df = pd.read_csv(apply_filename)

ana = DataAna(ana_df_name)
ana.merge_previous_data(orig_df)

while True:
    user_input = input("Enter airline and route, separated by comma, or 'c' to exit: ")
    if user_input.lower() == 'c':
        break
    try:
        airline, route = user_input.split(',')
        airline = airline.strip()  # remove possible leading/trailing whitespaces
        route = route.strip()  # remove possible leading/trailing whitespaces
        ana.plot_prediction(airline, route)
    except ValueError:
        print("Invalid input, please enter the airline and route separated by a comma or 'continue' to proceed.")

# Add new performance data and seats data to original data

In [14]:
'''
Add new performance data and seats data to original data
'''

import pandas as pd

folder_path = r'C:\Users\qilei.zhang\OneDrive - Frontier Airlines\Documents\Data\USconti'
seats_file_name = r'\Schedule_Monthly_Summary_Report_Conti.csv'
perf_file_name = r'\Airline_Performance_Report_Conti.csv'

seats_file_new = r'\Schedule_Monthly_Summary_2023Q1.csv'
perf_file_new = r'\Airline_Performance_Report_USconti_2023Q1.csv'

seats_df = pd.read_csv(folder_path + seats_file_name)
perf_df = pd.read_csv(folder_path + perf_file_name)

seats_df_new = pd.read_csv(folder_path + seats_file_new)
perf_df_new = pd.read_csv(folder_path + perf_file_new)


# Check the column names are the same othewise not proceed
if not (seats_df.columns == seats_df_new.columns).all():
    print("Column names are not the same, please check the data")
    raise ValueError

if not (perf_df.columns == perf_df_new.columns).all():
    print("Column names are not the same, please check the data")
    raise ValueError

# Check the data types are the same othewise not proceed
if not (seats_df.dtypes == seats_df_new.dtypes).all():
    print("Column types are not the same, please check the data")
    raise ValueError

if not (perf_df.dtypes == perf_df_new.dtypes).all():
    print("Column types are not the same, please check the data")
    raise ValueError

seats_df = pd.concat([seats_df, seats_df_new])
perf_df = pd.concat([perf_df, perf_df_new])

seats_df.to_csv(folder_path + seats_file_name, index=False)
perf_df.to_csv(folder_path + perf_file_name, index=False)


# Test if the dates are correct

In [1]:

from RNN_model import calculate_quarters

calculate_quarters(pred_num_quarters=3, seq_num=10, start_quarter="Q1 2023", skip_quarters=0)

Train Boundary quarter (<): Q2 2022
Test data (>): Q4 2020


('Q2 2022', 'Q1 2020', 'Q4 2020')

# Automatically tune the hyperparameters and record the results

In [12]:

'''
Create combinations of parameters to tune
'''

# Change the parameters.json file to have the following
import pandas as pd
import itertools

params_to_tune = {
    "learning_rate": [1e-03, 1e-04],
    # "momentum": [0.95, 0.98],
    # "batch_size": [64, 128],
    "epochs": [20, 30],
    # "n_layers": [4, 5],
    "drop_prob": [0.35, 0.4],
    "bidirectional": [True, False], 
    "if_skip": [True, False], 
    # "if_feed_drop": [True, False], 
    # "if_feed_norm": [True, False],
}

# Generate all combinations
keys, values = zip(*params_to_tune.items())
param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

# Create DataFrame
df = pd.DataFrame(param_combinations)
df

Unnamed: 0,learning_rate,epochs,drop_prob,bidirectional,if_skip
0,0.001,20,0.35,True,True
1,0.001,20,0.35,True,False
2,0.001,20,0.35,False,True
3,0.001,20,0.35,False,False
4,0.001,20,0.4,True,True
5,0.001,20,0.4,True,False
6,0.001,20,0.4,False,True
7,0.001,20,0.4,False,False
8,0.001,30,0.35,True,True
9,0.001,30,0.35,True,False


In [22]:
# Load parameters from the JSON file. Check if the parameters are loaded correctly.
with open('parameters.json', 'r') as f:
    args = argparse.Namespace(**json.load(f))
args

Namespace(resume_training=False, MSE_or_GaussianNLLLoss='GaussianNLLLoss', pred_num_quarters=3, seq_num=10, if_add_time_info=False, learning_rate=0.0001, momentum=0.95, batch_size=64, epochs=20, num_workers=1, shuffle=True, fixed_seed=True, rnn_type='LSTM', n_layers=4, drop_prob=0.35, num_heads=6, start_year=2004, checkpoint_file_name='checkpoint_20.pth', bidirectional=True, if_skip=False, if_feed_drop=True, if_feed_norm=True, start_quarter='Q1 2023', skip_quarters=2, validation_type='Val', tune=False)

In [21]:
import RNN_model
import RNN_apply_ind, os

folder_path = r'C:\Users\qilei.zhang\OneDrive - Frontier Airlines\Documents\Data\USconti'
seats_file_name = r'\Schedule_Monthly_Summary_Report_Conti.csv'
perf_file_name = r'\Airline_Performance_Report_Conti.csv'
apply_file_name = 'Schedule_Monthly_Summary_2023Q1234.csv'

for idx, row in df.iterrows():
    print(idx)
    # Extract parameters from the row
    params = row.to_dict()

    # Load parameters from the JSON file.
    with open('parameters.json', 'r') as f:
        args = argparse.Namespace(**json.load(f))

    # assign the all parameters from the row to the args object
    for key, value in params.items():
        setattr(args, key, value)
        print(key, value)

    # Check types of certain parameters, and transform them if necessary
    if not isinstance(args.epochs, int):
        args.epochs = int(args.epochs)
    if not isinstance(args.batch_size, int):
        args.batch_size = int(args.batch_size)
    if not isinstance(args.n_layers, int):
        args.n_layers = int(args.n_layers)

    # Run the model
    # Run Training
    RNN_model.main_program(args, folder_path, seats_file_name, perf_file_name, tune_folder=str(idx))

    # Run validation
    RNN_apply_ind.main_apply(args, folder_path, seats_file_name, perf_file_name, apply_file_name, tune_folder=str(idx))


0
learning_rate 0.001
epochs 20
drop_prob 0.35
bidirectional True
if_skip True
1
learning_rate 0.001
epochs 20
drop_prob 0.35
bidirectional True
if_skip False
2
learning_rate 0.001
epochs 20
drop_prob 0.35
bidirectional False
if_skip True
3
learning_rate 0.001
epochs 20
drop_prob 0.35
bidirectional False
if_skip False
4
learning_rate 0.001
epochs 20
drop_prob 0.4
bidirectional True
if_skip True
5
learning_rate 0.001
epochs 20
drop_prob 0.4
bidirectional True
if_skip False
6
learning_rate 0.001
epochs 20
drop_prob 0.4
bidirectional False
if_skip True
7
learning_rate 0.001
epochs 20
drop_prob 0.4
bidirectional False
if_skip False
8
learning_rate 0.001
epochs 30
drop_prob 0.35
bidirectional True
if_skip True
9
learning_rate 0.001
epochs 30
drop_prob 0.35
bidirectional True
if_skip False
10
learning_rate 0.001
epochs 30
drop_prob 0.35
bidirectional False
if_skip True
11
learning_rate 0.001
epochs 30
drop_prob 0.35
bidirectional False
if_skip False
12
learning_rate 0.001
epochs 30
drop_prob

# Run current forecast

In [None]:
# Run current forecast
# Need to put validation_type to be "test" otherwise use "Val"

In [None]:
import RNN_apply_ind, os, json, argparse

folder_path = r'C:\Users\qilei.zhang\OneDrive - Frontier Airlines\Documents\Data\USconti'
seats_file_name = r'\Schedule_Monthly_Summary_Report_Conti.csv'
perf_file_name = r'\Airline_Performance_Report_Conti.csv'
apply_file_name = '\Schedule_Monthly_Summary_2023Q1234.csv'
# Load parameters from the JSON file.
if not os.path.exists('parameters.json'):
    print("parameters.json does not exist, Find the file and put it in the same folder as this file")
with open('parameters.json', 'r') as f:
    args = argparse.Namespace(**json.load(f))

key = "validation_type"
value = "test"
setattr(args, key, value)

RNN_apply_ind.main_apply(args, folder_path, seats_file_name, perf_file_name, apply_file_name)