In [None]:
%pip install pandas
%pip install numpy
%pip install matplotlib
%pip install scikit-learn
%pip install openpyxl
%pip install xgboost
%pip install sklearn.preprocessing
#pip install catboost

In [18]:
# Import the required packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

#Google Sheet Link: https://docs.google.com/spreadsheets/d/1RiU9c3YihCsAqh18C25WMfTxkOiAf-nXhxAWtEaKLA0/edit#gid=0
pd.set_option('display.max_rows', 200, 'display.min_rows', 30) #Displays a max of 200 rows for testing

sheet_id = '1RiU9c3YihCsAqh18C25WMfTxkOiAf-nXhxAWtEaKLA0'
xls = pd.ExcelFile(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx")

arrivals_pure = pd.read_excel(xls, 'Arrivals', header = 0) #Reads in the Google Sheets Data
departures_pure = pd.read_excel(xls, 'Departures', header = 0)

In [19]:
arrivals = arrivals_pure.drop(['blank', 'Communication Type/Reason for Speaking.1'], axis=1) #Drops unnecessary Columns/Headers
departures = departures_pure.drop(['Airport', 'Communication Type/Reason for Speaking.1'], axis=1)

arrivals = arrivals.dropna(thresh=14) #Drops all rows and columns with more than 14 N/A values
departures = departures.dropna(thresh=14)

# create a new leftmost column for the index
# iterate through the rows and index the responses of each flight based on unique flight no.
def index_data(df):
    counter = 0
    flight_no = -1
    df.insert(loc = 0, # create arrivals indexing
                column = 'index',
                value = 0)
    for row in df.iterrows():
        if flight_no != row[1]['Flight No.']:
            flight_no = row[1]['Flight No.']
            counter = 0
        df.loc[row[0], 'index'] = counter
        counter += 1
    return df

arrivals = index_data(arrivals)
departures = index_data(departures)

#Gets the specifics columns from collected data
x = arrivals[['index']]
y = arrivals[['Communication Type/Reason for Speaking']]

# Change y-axis labels to numeric values
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y = encoder.fit_transform(y.values.ravel())

In [61]:
#Gets the specifics columns from collected data
# x = arrivals[['index']]
# x = arrivals.drop(['Communication Type/Reason for Speaking', 'Transcribed Communication'], axis=1)
x = arrivals[['Time of Communication']]
x = pd.to_datetime(x['Time of Communication'])
x = x.map(pd.Timestamp.timestamp)

y = arrivals[['Communication Type/Reason for Speaking']]
y = encoder.fit_transform(y.values.ravel())

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .25, random_state = 42)

from keras.preprocessing.sequence import TimeseriesGenerator
look_back = 5
training_generator = TimeseriesGenerator(x_train, y_train, length=look_back, batch_size=10)
testing_generator = TimeseriesGenerator(x_test, y_test, length=look_back, batch_size=1)

print(training_generator[0])

import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dense
model = Sequential()
model.add(
    LSTM(
        units=10,
        activation='relu',
        input_shape=(look_back, 1)
    )
)
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

model.fit(training_generator, epochs=10, verbose=1)
prediction = model.predict(testing_generator)

display(prediction)



272    1.680801e+09
229    1.680802e+09
340    1.680786e+09
18     1.680779e+09
153    1.680798e+09
26     1.680780e+09
333    1.680786e+09
358    1.680786e+09
242    1.680797e+09
260    1.680803e+09
373    1.680785e+09
215    1.680787e+09
145    1.680797e+09
280    1.680801e+09
355    1.680786e+09
           ...     
163    1.680798e+09
143    1.680797e+09
165    1.680798e+09
109    1.680802e+09
97     1.680801e+09
357    1.680786e+09
233    1.680802e+09
133    1.680797e+09
376    1.680785e+09
21     1.680779e+09
205    1.680787e+09
80     1.680801e+09
117    1.680802e+09
294    1.680787e+09
112    1.680802e+09
Name: Time of Communication, Length: 261, dtype: float64

AAAAAAAAAAAAAAAAAAAAAAAAA


array([4, 0, 0, 0, 3, 5, 3, 2, 3, 0, 4, 2, 0, 2, 3, 5, 0, 0, 0, 0, 1, 0,
       5, 4, 0, 3, 4, 3, 0, 3, 4, 3, 1, 0, 3, 5, 5, 0, 0, 3, 0, 0, 2, 5,
       3, 0, 3, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 3, 0, 3, 0, 3, 5, 0, 5,
       4, 0, 5, 3, 0, 5, 0, 3, 4, 6, 0, 0, 0, 2, 4, 4, 4, 0, 0, 0, 5, 3,
       0, 1, 5, 0, 0, 0, 0, 2, 3, 0, 0, 3, 3, 5, 5, 0, 0, 3, 0, 5, 3, 3,
       5, 0, 0, 4, 4, 0, 3, 0, 5, 4, 2, 5, 1, 5, 0, 3, 3, 3, 5, 0, 1, 5,
       2, 1, 3, 3, 5, 5, 1, 0, 4, 0, 2, 2, 3, 5, 0, 4, 3, 0, 0, 0, 4, 5,
       0, 0, 2, 0, 3, 2, 2, 6, 2, 5, 3, 0, 3, 5, 2, 3, 3, 5, 3, 0, 4, 3,
       5, 0, 3, 1, 1, 0, 0, 4, 1, 3, 0, 4, 5, 5, 6, 5, 3, 3, 0, 0, 0, 0,
       5, 5, 3, 4, 4, 5, 3, 0, 3, 3, 5, 2, 3, 3, 2, 0, 3, 1, 3, 0, 5, 0,
       3, 4, 3, 4, 0, 3, 0, 2, 5, 1, 0, 2, 0, 0, 4, 4, 5, 3, 0, 3, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, 4, 0, 3, 3, 0, 0, 5, 0])

(array([[1.68080064e+09, 1.68080196e+09, 1.68078570e+09, 1.68077916e+09,
        1.68079770e+09],
       [1.68080196e+09, 1.68078570e+09, 1.68077916e+09, 1.68079770e+09,
        1.68077982e+09],
       [1.68078570e+09, 1.68077916e+09, 1.68079770e+09, 1.68077982e+09,
        1.68078558e+09],
       [1.68077916e+09, 1.68079770e+09, 1.68077982e+09, 1.68078558e+09,
        1.68078648e+09],
       [1.68079770e+09, 1.68077982e+09, 1.68078558e+09, 1.68078648e+09,
        1.68079746e+09],
       [1.68077982e+09, 1.68078558e+09, 1.68078648e+09, 1.68079746e+09,
        1.68080298e+09],
       [1.68078558e+09, 1.68078648e+09, 1.68079746e+09, 1.68080298e+09,
        1.68078480e+09],
       [1.68078648e+09, 1.68079746e+09, 1.68080298e+09, 1.68078480e+09,
        1.68078732e+09],
       [1.68079746e+09, 1.68080298e+09, 1.68078480e+09, 1.68078732e+09,
        1.68079746e+09],
       [1.68080298e+09, 1.68078480e+09, 1.68078732e+09, 1.68079746e+09,
        1.68080076e+09]]), array([5, 3, 2, 3, 0, 4, 2,

array([[1.8135610e+08],
       [1.8135582e+08],
       [1.8135574e+08],
       [1.8135598e+08],
       [1.8135570e+08],
       [1.8135550e+08],
       [1.8135587e+08],
       [1.8135610e+08],
       [1.8135616e+08],
       [1.8135632e+08],
       [1.8135616e+08],
       [1.8135562e+08],
       [1.8135539e+08],
       [1.8135565e+08],
       [1.8135526e+08],
       [1.8135555e+08],
       [1.8135594e+08],
       [1.8135565e+08],
       [1.8135542e+08],
       [1.8135549e+08],
       [1.8135576e+08],
       [1.8135534e+08],
       [1.8135589e+08],
       [1.8135555e+08],
       [1.8135594e+08],
       [1.8135566e+08],
       [1.8135565e+08],
       [1.8135539e+08],
       [1.8135525e+08],
       [1.8135536e+08],
       [1.8135536e+08],
       [1.8135517e+08],
       [1.8135496e+08],
       [1.8135490e+08],
       [1.8135526e+08],
       [1.8135523e+08],
       [1.8135563e+08],
       [1.8135610e+08],
       [1.8135600e+08],
       [1.8135544e+08],
       [1.8135582e+08],
       [1.813559