In [1]:
import os
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from tqdm.auto import tqdm
tqdm.pandas()

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.layers import Dense, Input, Flatten, BatchNormalization, Dropout, Concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
train = pd.read_csv('https://raw.githubusercontent.com/benvictoria17/DataAnalytics/master/dataset/ATIS%20Airline%20Travel%20Information%20System/atis_intents.csv')
train.columns = ['intent', 'snippet']

print(train.shape)
train.head()

(4977, 2)


Unnamed: 0,intent,snippet
0,atis_flight,what flights are available from pittsburgh to...
1,atis_flight_time,what is the arrival time in san francisco for...
2,atis_airfare,cheapest airfare from tacoma to orlando
3,atis_airfare,round trip fares from pittsburgh to philadelp...
4,atis_flight,i need a flight tomorrow from columbus to min...


In [3]:
train.intent.value_counts(), train.intent.value_counts(normalize=True)

(atis_flight                                 3665
 atis_airfare                                 423
 atis_ground_service                          255
 atis_airline                                 157
 atis_abbreviation                            147
 atis_aircraft                                 81
 atis_flight_time                              54
 atis_quantity                                 51
 atis_flight#atis_airfare                      21
 atis_airport                                  20
 atis_distance                                 20
 atis_city                                     19
 atis_ground_fare                              18
 atis_capacity                                 16
 atis_flight_no                                12
 atis_meal                                      6
 atis_restriction                               6
 atis_airline#atis_flight_no                    2
 atis_ground_service#atis_ground_fare           1
 atis_airfare#atis_flight_time                  1


In [4]:
test = pd.read_csv('https://raw.githubusercontent.com/benvictoria17/DataAnalytics/master/dataset/ATIS%20Airline%20Travel%20Information%20System/atis_intents_test.csv')
test.columns = ['intent', 'snippet']

print(test.shape)
test.head()

(799, 2)


Unnamed: 0,intent,snippet
0,atis_airfare,on april first i need a ticket from tacoma to...
1,atis_flight,on april first i need a flight going from pho...
2,atis_flight,i would like a flight traveling one way from ...
3,atis_flight,i would like a flight from orlando to salt la...
4,atis_flight,i need a flight from toronto to newark one wa...


In [5]:
test.intent.value_counts(), test.intent.value_counts(normalize=True)

(atis_flight            631
 atis_airfare            48
 atis_airline            38
 atis_ground_service     36
 atis_abbreviation       33
 atis_aircraft            9
 atis_quantity            3
 atis_flight_time         1
 Name: intent, dtype: int64,
 atis_flight            0.789737
 atis_airfare           0.060075
 atis_airline           0.047559
 atis_ground_service    0.045056
 atis_abbreviation      0.041302
 atis_aircraft          0.011264
 atis_quantity          0.003755
 atis_flight_time       0.001252
 Name: intent, dtype: float64)

In [6]:
train_data = train.snippet.values
train_labels = train.intent.values
test_data = test.snippet.values
test_labels = test.intent.values

len(train_data), len(train_labels), len(test_data), len(test_labels)

(4977, 4977, 799, 799)

In [7]:
print(train_data[123])
print(train_labels[123])

 what are the classes of service for american airlines
atis_flight


In [8]:
y_train = pd.get_dummies(train_labels)
print(y_train.shape)
y_train.head()

(4977, 22)


Unnamed: 0,atis_abbreviation,atis_aircraft,atis_aircraft#atis_flight#atis_flight_no,atis_airfare,atis_airfare#atis_flight_time,atis_airline,atis_airline#atis_flight_no,atis_airport,atis_capacity,atis_cheapest,...,atis_flight,atis_flight#atis_airfare,atis_flight_no,atis_flight_time,atis_ground_fare,atis_ground_service,atis_ground_service#atis_ground_fare,atis_meal,atis_quantity,atis_restriction
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [9]:
y_train = pd.get_dummies(train_labels)
print(y_train.shape)
y_train.head()

(4977, 22)


Unnamed: 0,atis_abbreviation,atis_aircraft,atis_aircraft#atis_flight#atis_flight_no,atis_airfare,atis_airfare#atis_flight_time,atis_airline,atis_airline#atis_flight_no,atis_airport,atis_capacity,atis_cheapest,...,atis_flight,atis_flight#atis_airfare,atis_flight_no,atis_flight_time,atis_ground_fare,atis_ground_service,atis_ground_service#atis_ground_fare,atis_meal,atis_quantity,atis_restriction
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [10]:
y_test = pd.get_dummies(test_labels)
print(y_test.shape)
y_test.head()

(799, 8)


Unnamed: 0,atis_abbreviation,atis_aircraft,atis_airfare,atis_airline,atis_flight,atis_flight_time,atis_ground_service,atis_quantity
0,0,0,1,0,0,0,0,0
1,0,0,0,0,1,0,0,0
2,0,0,0,0,1,0,0,0
3,0,0,0,0,1,0,0,0
4,0,0,0,0,1,0,0,0


In [11]:
module_url = 'https://tfhub.dev/google/universal-sentence-encoder-large/4'
embed = hub.KerasLayer(module_url, trainable=True, name='USE_embedding')

2022-05-23 00:12:56.620979: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [13]:
def build_model(embed):
    
    model = Sequential([
        Input(shape=[], dtype=tf.string),
        embed,
        Dense(8, activation='softmax')
    ])
    model.compile(Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

model = build_model(embed)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
USE_embedding (KerasLayer)   {'outputs': (None, 512)}  147354880 
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 4104      
Total params: 147,358,984
Trainable params: 147,358,984
Non-trainable params: 0
_________________________________________________________________
