In [1]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

  import cryptography.exceptions


In [3]:
data = pd.read_csv(r'../data/train.csv')

# Pre-processing Data

In [4]:
#Check for Blank Values
data.isna().sum()

Unnamed: 0                             0
id                                     0
Gender                                 0
Customer Type                          0
Age                                    0
Type of Travel                         0
Class                                  0
Flight Distance                        0
Inflight wifi service                  0
Departure/Arrival time convenient      0
Ease of Online booking                 0
Gate location                          0
Food and drink                         0
Online boarding                        0
Seat comfort                           0
Inflight entertainment                 0
On-board service                       0
Leg room service                       0
Baggage handling                       0
Checkin service                        0
Inflight service                       0
Cleanliness                            0
Departure Delay in Minutes             0
Arrival Delay in Minutes             310
satisfaction    

In [5]:
# Drop Blank Values
data.dropna(inplace=True)

In [6]:
# Drop columns which just show the serial number
data.drop(columns=['Unnamed: 0','id'], inplace=True)

In [7]:
data['satisfaction_labels'] = data['satisfaction'].apply(lambda x : 1 if x=='satisfied' else 0)

In [8]:
data.columns = data.columns.str.replace(' ', '_')

In [9]:
data.drop(columns=['satisfaction'], inplace=True)

# Data Splitting

In [10]:
train, val = train_test_split(data, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')

82875 train examples
20719 validation examples


# Data Conversion to TF Dataset Format

In [11]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('satisfaction_labels')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [12]:
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)

In [13]:
for feature_batch, label_batch in train_ds.take(1):
  print('Every feature:', list(feature_batch.keys()))
  print('A batch of ages:', feature_batch['Age'])
  print('A batch of targets:', label_batch )

Every feature: ['Gender', 'Customer_Type', 'Age', 'Type_of_Travel', 'Class', 'Flight_Distance', 'Inflight_wifi_service', 'Departure/Arrival_time_convenient', 'Ease_of_Online_booking', 'Gate_location', 'Food_and_drink', 'Online_boarding', 'Seat_comfort', 'Inflight_entertainment', 'On-board_service', 'Leg_room_service', 'Baggage_handling', 'Checkin_service', 'Inflight_service', 'Cleanliness', 'Departure_Delay_in_Minutes', 'Arrival_Delay_in_Minutes']
A batch of ages: tf.Tensor(
[35 49 41 49  7 25 68 17 43 37 40 20 39 33 58 36 34 38 29 58 21 33 36 52
 23 19 15 31 19 54 48 33], shape=(32,), dtype=int32)
A batch of targets: tf.Tensor([0 0 1 0 0 1 1 0 0 0 1 0 1 0 1 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 1], shape=(32,), dtype=int32)


# Feature Transformation

In [14]:
element_spec = train_ds.element_spec

# Identify numerical and categorical columns
numerical_cols = []
categorical_cols = []
for key, value in element_spec[0].items():
    if value.dtype in (tf.float32, tf.float64, tf.int32, tf.int64):
        numerical_cols.append(key)
    else:
        categorical_cols.append(key)

print("Numerical columns:", numerical_cols)
print("Categorical columns:", categorical_cols)

Numerical columns: ['Age', 'Flight_Distance', 'Inflight_wifi_service', 'Departure/Arrival_time_convenient', 'Ease_of_Online_booking', 'Gate_location', 'Food_and_drink', 'Online_boarding', 'Seat_comfort', 'Inflight_entertainment', 'On-board_service', 'Leg_room_service', 'Baggage_handling', 'Checkin_service', 'Inflight_service', 'Cleanliness', 'Departure_Delay_in_Minutes', 'Arrival_Delay_in_Minutes']
Categorical columns: ['Gender', 'Customer_Type', 'Type_of_Travel', 'Class']


In [15]:
feature_columns = []

# numeric cols
for num_cols in numerical_cols:
  feature_columns.append(feature_column.numeric_column(num_cols))

In [18]:
for col_name in categorical_cols:
  categorical_column = feature_column.categorical_column_with_vocabulary_list(
      col_name, data[col_name].unique())
  indicator_column = feature_column.indicator_column(categorical_column)
  feature_columns.append(indicator_column)

# Model Building

In [19]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [20]:
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(256, activation='relu'),
  layers.Dropout(.1),
  layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_ds,
          validation_data=val_ds,
          epochs=10)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Instructions for updating:
The old _FeatureColumn APIs are being deprecated. Please use the new FeatureColumn APIs instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x20dcd5a7f60>

# Inference

In [78]:
# Reading Test Data
test_data = pd.read_csv(r'Dataset/test.csv')

test_data.drop(columns=['Unnamed: 0','id'], inplace=True)
test_data.columns = test_data.columns.str.replace(' ', '_')

test_data['satisfaction_bin_labels'] = test_data['satisfaction'].apply(lambda x : 1 if x=='satisfied' else 0)

test_features = {col: test_data[col].values for col in test_data.columns if col != 'satisfaction'}

# Make predictions on the test dataset
predictions = model.predict(test_features)

# Assuming predictions are probabilities, convert them to binary labels (0 or 1)
binary_predictions = np.round(predictions).astype(int)

# Print or use binary_predictions as needed
print("Binary Predictions:")
print(binary_predictions)

Binary Predictions:
[[1]
 [1]
 [0]
 ...
 [0]
 [1]
 [0]]


In [79]:
# Evaluating Model on Test Data
from sklearn.metrics import classification_report

print(classification_report(test_data['satisfaction_bin_labels'].values, binary_predictions))

              precision    recall  f1-score   support

           0       0.92      0.94      0.93     14573
           1       0.93      0.90      0.91     11403

    accuracy                           0.92     25976
   macro avg       0.92      0.92      0.92     25976
weighted avg       0.92      0.92      0.92     25976



# Save Model

In [None]:
model_filename = "dl_model_v1"

# Save the entire model (including architecture, optimizer, and learned weights)
model.save(f"models/{model_filename}")

# Model Tuning

In [None]:
# Below is a sample code for finetuning the model , you can customise it according to your implementation.
# We have provided some default parameters & layers which you can customize.

# def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8]):
    
#     model = tf.keras.models.Sequential()
#     model.add(tf.keras.kayers.InputLayer(input_shape=input_shape))
#     for layer in range(n_hidden):
#         model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
#     model.add(tf.keras.layers.Dense(1))
#     optimizer = tf.keras.optimizers.SGD(lr=learning_rate)
#     model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer)
    
#     return model

# tune_model_obj = tf.keras.wrappers.scikit_learn.KerasClassifier(build_model)

# tune_model_obj.fit(X_train, y_train, epochs=10,
#                     validation_data=(X_valid, y_valid),
#                     callbacks=tf.keras.callbacks.EarlyStopping(patience=10))

# param_space = {...}

# rndm_search = RandomizedSearchCV(tune_model_obj, param_space, n_iter=10, cv=3)

# rndm_search.fit(X_train, y_train, epochs=10,
#                 validation_data=(X_valid, y_valid),
#                 callbacks=tf.keras.callbacks.EarlyStopping(patience=10))