In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from utils.generate import Generate
from utils.train import Build
from utils.process import Format, Preprocess, Categorize, Assemble
from sklearn.model_selection import train_test_split

generate = Generate()
categorize = Categorize()
assemble = Assemble()

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [2]:
df = pd.read_csv('./data/Clean_Dataset.csv')

df['path'] = df['source_city'] + ' ' + df['destination_city']
df['time'] = df['departure_time'] + ' ' + df['arrival_time']
df.replace({'Economy':0,'Business':1}, inplace=True)
df = df.drop(['Unnamed: 0','flight','source_city','destination_city','departure_time','arrival_time'], axis=1)

days_left_ranges = generate.generate_numerical_ranges(df.days_left, [2,5,10,18])
days_left_dict = categorize.categorize_numerical_variable(df.days_left, days_left_ranges)
df['days_left_category'] = df['days_left'].map(days_left_dict)

start = df.groupby('path').mean()['price'].min()
finish = df.groupby('path').mean()['price'].max()
path_ranges = generate.generate_categorical_ranges(3, start, finish)


path_group = df.groupby('path').mean()['price']
path_dict = categorize.categorize_categorical_variable(path_group, path_ranges)
df['path_category'] = df['path'].map(path_dict)

duration_ranges = generate.generate_numerical_ranges(df.duration, [5,18,28])
duration_dict = categorize.categorize_numerical_variable(df.duration, duration_ranges)
df['duration_category']=df['duration'].map(duration_dict)

start = df.groupby('time').mean()['price'].min()
finish = df.groupby('time').mean()['price'].max()
time_ranges = generate.generate_categorical_ranges(3, start, finish)


time_group = df.groupby('time').mean()['price']
time_dict = categorize.categorize_categorical_variable(time_group, time_ranges)
df['time_category'] = df['time'].map(time_dict)

df.drop(['days_left','path','duration','time'], axis=1, inplace=True)

In [3]:
layers1 = [
        layers.Dense(17, activation="relu", input_shape = (pd.)),
        layers.Dense(5, activation="relu"),
        layers.Dense(1),
    ]

layers2 = [
        layers.Dense(8, activation="relu"),
        layers.Dense(3, activation="relu"),
        layers.Dense(1),
    
]

In [4]:
encoder = [
        layers.Dense(8, activation="relu"),
        layers.Dense(5, activation="relu"),
        layers.Dense(2, activation='relu')
    ]

decoder = [
        layers.Dense(5, activation="relu"),
        layers.Dense(8, activation="relu"),
        layers.Dense(19, activation ='relu')
]

In [4]:
all_regression_test_dict = {}
all_regression_test_dict['data'] = {'A1':df}

In [6]:
models = [[{'model': 'ANN1', 'layers':layers1 , 'compile_parameters':{'optimizer': RMSprop(), 'loss':'mse'}, 'fit_parameters':{'batch_size':1200}},
          {'feature_selection': ['SFM1', {}], 'dimensionality_reduction': ['AE1', {'encoder_layers':encoder, 'decoder_layers':decoder, 'compile_parameters':{'optimizer': Adam(), 'loss':'mse'}}]}],
]

In [7]:
#[{'model':'ANN2', 'layers': layers2, 'compile_parameters':{'optimizer': Adam(), 'loss':'mse'}, 'fit_parameters':{'batch_size':1200}}]

In [8]:
models[0]

[{'model': 'ANN1',
  'layers': [<keras.layers.core.dense.Dense at 0x2400ed35640>,
   <keras.layers.core.dense.Dense at 0x2400012b6d0>,
   <keras.layers.core.dense.Dense at 0x2400012bcd0>],
  'compile_parameters': {'optimizer': <keras.optimizer_v2.rmsprop.RMSprop at 0x2400f4c0a00>,
   'loss': 'mse'},
  'fit_parameters': {'batch_size': 1200}},
 {'feature_selection': ['SFM1', {}],
  'dimensionality_reduction': ['AE1',
   {'encoder_layers': [<keras.layers.core.dense.Dense at 0x2400f4be7c0>,
     <keras.layers.core.dense.Dense at 0x2400f4be880>,
     <keras.layers.core.dense.Dense at 0x2400f4be5b0>],
    'decoder_layers': [<keras.layers.core.dense.Dense at 0x240000effa0>,
     <keras.layers.core.dense.Dense at 0x2400ed352b0>,
     <keras.layers.core.dense.Dense at 0x2400f4c0040>],
    'compile_parameters': {'optimizer': <keras.optimizer_v2.adam.Adam at 0x2400f4c0d60>,
     'loss': 'mse'}}]}]

In [9]:
build = Build(all_regression_test_dict, dimensionality_reduction=False)
build.build_deep_learning_models(models, 'price')

Training deeplearning model ANN1 for A1
Training done!



In [60]:
for layer in all_regression_test_dict['models']['A1ANN1AE1'].weights:
    print(layer)
    print()

<tf.Variable 'dense_37/kernel:0' shape=(2, 17) dtype=float32, numpy=
array([[-0.2584811 , -0.4509383 ,  0.63770825,  0.50745505,  0.5929335 ,
        -0.522137  ,  0.24229176,  0.43238106,  0.2868892 , -0.5571894 ,
        -0.32216442, -0.29715925,  0.00515057, -0.42557704, -0.3572872 ,
        -0.04676425,  0.5927974 ],
       [ 0.0522849 ,  0.3470242 ,  0.4217326 ,  0.16884857,  0.33818895,
         0.25962448,  0.12329763,  0.24065506, -0.1385031 , -0.24856591,
         0.37057155,  0.2169671 , -0.22991472, -0.08869642,  0.03141743,
         0.48560256, -0.54153496]], dtype=float32)>

<tf.Variable 'dense_37/bias:0' shape=(17,) dtype=float32, numpy=
array([ 0.        ,  0.        ,  0.1614278 ,  0.1804619 ,  0.20421855,
        0.        ,  0.19767603,  0.07751837,  0.22634448,  0.        ,
        0.        ,  0.        , -0.05157971,  0.        ,  0.        ,
        0.        ,  0.14836928], dtype=float32)>

<tf.Variable 'dense_38/kernel:0' shape=(17, 5) dtype=float32, numpy=
arra

In [61]:
all_regression_test_dict['models']['A1ANN1AE1'].layers

[<keras.layers.core.dense.Dense at 0x18019678490>,
 <keras.layers.core.dense.Dense at 0x180196787f0>,
 <keras.layers.core.dense.Dense at 0x180196782e0>]

In [10]:
all_test_tables_dict = assemble.assemble_test_tables(all_regression_test_dict)
all_regression_test_dict['test_tables'] = all_test_tables_dict
assemble.assemble_error_values(all_regression_test_dict)

Unnamed: 0_level_0,MEPE,MPE,MEAE,MAE,MSE,RMSE,NRMSE,STD
ALGORITHM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A1ANN1,99.888,99.881,7402.455,20709.064,940131300.0,30661.561,0.269908,22612.54


In [11]:
# NO INPUT
all_test_tables_dict = assemble.assemble_test_tables(all_regression_test_dict)
all_regression_test_dict['test_tables'] = all_test_tables_dict
assemble.assemble_error_values(all_regression_test_dict)

Unnamed: 0_level_0,MEPE,MPE,MEAE,MAE,MSE,RMSE,NRMSE,STD
ALGORITHM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A1ANN1,99.888,99.881,7402.455,20709.064,940131300.0,30661.561,0.269908,22612.54


In [63]:
all_regression_test_dict['X_test']['A1'].shape

(60031, 2)