In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [2]:
df = pd.read_csv("cust_satisfaction.csv")
df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied,13,460,5,4,5,25,18.0
1,Male,disloyal Customer,Business travel,Business,neutral or dissatisfied,25,235,1,3,1,1,6.0
2,Female,Loyal Customer,Business travel,Business,satisfied,26,1142,5,4,5,0,0.0
3,Female,Loyal Customer,Business travel,Business,neutral or dissatisfied,25,562,2,3,2,11,9.0
4,Male,Loyal Customer,Business travel,Business,satisfied,61,214,3,4,3,0,0.0


In [3]:
df.isnull().sum()

Gender                          0
Customer Type                   0
Type of Travel                  0
Class                           0
satisfaction                    0
Age                             0
Flight Distance                 0
Inflight entertainment          0
Baggage handling                0
Cleanliness                     0
Departure Delay in Minutes      0
Arrival Delay in Minutes      310
dtype: int64

In [4]:
df.dropna(inplace=True)

In [5]:
df.duplicated().sum()

np.int64(172)

In [None]:

df.drop_duplicates(inplace=True)

In [8]:
df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,Male,Loyal Customer,Personal Travel,Eco Plus,neutral or dissatisfied,13,460,5,4,5,25,18.0
1,Male,disloyal Customer,Business travel,Business,neutral or dissatisfied,25,235,1,3,1,1,6.0
2,Female,Loyal Customer,Business travel,Business,satisfied,26,1142,5,4,5,0,0.0
3,Female,Loyal Customer,Business travel,Business,neutral or dissatisfied,25,562,2,3,2,11,9.0
4,Male,Loyal Customer,Business travel,Business,satisfied,61,214,3,4,3,0,0.0


In [14]:
df["Customer Type"].value_counts()

Customer Type
1    84517
0    18905
Name: count, dtype: int64

In [12]:
df["Gender"] = df["Gender"].map({"Male":1,"Female":0})
df["Customer Type"] = df["Customer Type"].map({"Loyal Customer":1,"disloyal Customer":0})
df["Type of Travel"] = df["Type of Travel"].map({"Personal Travel":0,"Business travel" :1})
df["Class"] = df["Class"].map({"Business":1,"Eco" : 2,"Eco Plus" : 3})
df["satisfaction"] = df["satisfaction"].map({"neutral or dissatisfied":0,"satisfied":1})

In [13]:
df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,0,3,0,13,460,5,4,5,25,18.0
1,1,0,1,1,0,25,235,1,3,1,1,6.0
2,0,1,1,1,1,26,1142,5,4,5,0,0.0
3,0,1,1,1,0,25,562,2,3,2,11,9.0
4,1,1,1,1,1,61,214,3,4,3,0,0.0


In [15]:
loyal_customer = df[df["Customer Type"] ==1].loc[0:20000]
disloyal_customer = df[df["Customer Type"]==0]

In [16]:
balance_df = pd.concat([loyal_customer,disloyal_customer],axis=0)
balance_df.head()

Unnamed: 0,Gender,Customer Type,Type of Travel,Class,satisfaction,Age,Flight Distance,Inflight entertainment,Baggage handling,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,0,3,0,13,460,5,4,5,25,18.0
2,0,1,1,1,1,26,1142,5,4,5,0,0.0
3,0,1,1,1,0,25,562,2,3,2,11,9.0
4,1,1,1,1,1,61,214,3,4,3,0,0.0
5,0,1,0,2,0,26,1180,1,4,1,0,0.0


In [17]:
balance_df.isnull().sum()

Gender                        0
Customer Type                 0
Type of Travel                0
Class                         0
satisfaction                  0
Age                           0
Flight Distance               0
Inflight entertainment        0
Baggage handling              0
Cleanliness                   0
Departure Delay in Minutes    0
Arrival Delay in Minutes      0
dtype: int64

In [18]:
x = balance_df.drop("Customer Type",axis=1)
y = balance_df[["Customer Type"]]

In [19]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [20]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

x_train_scaled = sc.fit_transform(x_train)
x_test_scaled= sc.transform(x_test)

In [21]:
x_train_scaled.shape

(28191, 11)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout

##defining the ann model 
model = Sequential()

##input layer 
model.add(Dense(68,activation = "relu",input_dim = x_train_scaled.shape[1]))
model.add(Dropout(0.3))

##hidden layer 

model.add(Dense(32,activation = "relu"))
model.add(Dropout(0.3))
model.add(Dense(24,activation = "relu"))
model.add(Dropout(0.3))
model.add(Dense(12,activation = "relu"))


##output layer 

model.add(Dense(1,activation = "sigmoid"))

##compile our  model 

model.compile(loss = "binary_crossentropy",optimizer = "adam",metrics = ["accuracy"])
model.summary()

In [25]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor = "val_loss",
    patience = 3 ,
    restore_best_weights = True
)


history = model.fit(
    x_train_scaled,y_train,
    epochs = 50 ,
    batch_size = 32,
    validation_data = (x_test_scaled,y_test),
    callbacks = [early_stopping]
)

Epoch 1/50
[1m  1/881[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 20ms/step - accuracy: 0.9062 - loss: 0.1790

[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9434 - loss: 0.1485 - val_accuracy: 0.9278 - val_loss: 0.2004
Epoch 2/50
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9422 - loss: 0.1517 - val_accuracy: 0.9288 - val_loss: 0.1980
Epoch 3/50
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9411 - loss: 0.1521 - val_accuracy: 0.9278 - val_loss: 0.2095
Epoch 4/50
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9447 - loss: 0.1455 - val_accuracy: 0.9271 - val_loss: 0.1992
Epoch 5/50
[1m881/881[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9439 - loss: 0.1487 - val_accuracy: 0.9283 - val_loss: 0.2012
