# 0.0 Import Libriaries

In [7]:
import pandas as pd
import numpy as np

from sklearn import metrics as mt
from sklearn.linear_model import LogisticRegression

# 0.1 Loading Datasets

In [2]:
X_train = pd.read_csv("X_training.csv")
y_train = pd.read_csv("y_training.csv")
X_val = pd.read_csv("X_validation.csv")
y_val = pd.read_csv("y_validation.csv")
X_test = pd.read_csv("X_test.csv")
y_test = pd.read_csv("y_test.csv")

In [3]:
X_train.head(3)

Unnamed: 0,id,customer_type,age,class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,food_and_drink,...,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,gender_Female,gender_Male,type_of_travel_business_travel,type_of_travel_personal_travel
0,13508,1,0.5,0.0,0.03958,0.6,0.6,0.6,0.6,1.0,...,0.5,1.0,0.6,0.4,0.0,0.013848,1.0,0.0,1.0,0.0
1,28874,1,0.24359,0.0,0.205775,0.6,0.4,0.4,0.4,0.6,...,0.5,0.5,0.2,0.6,0.0,0.0,0.0,1.0,1.0,0.0
2,21484,0,0.435897,1.0,0.026858,0.6,0.6,0.6,0.2,1.0,...,0.0,1.0,0.6,1.0,0.0,0.0,1.0,0.0,1.0,0.0


In [15]:
X_train.shape

(72515, 24)

In [4]:
# data preparation
y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

In [5]:
# removing column "id"
X_train = X_train.drop(["id"], axis=1)
X_val = X_val.drop(["id"], axis=1)
X_test = X_test.drop(["id"], axis=1)

In [6]:
X_train.head(3)

Unnamed: 0,customer_type,age,class,flight_distance,inflight_wifi_service,departure_arrival_time_convenient,ease_of_online_booking,gate_location,food_and_drink,online_boarding,...,baggage_handling,checkin_service,inflight_service,cleanliness,departure_delay_in_minutes,arrival_delay_in_minutes,gender_Female,gender_Male,type_of_travel_business_travel,type_of_travel_personal_travel
0,1,0.5,0.0,0.03958,0.6,0.6,0.6,0.6,1.0,1.0,...,0.5,1.0,0.6,0.4,0.0,0.013848,1.0,0.0,1.0,0.0
1,1,0.24359,0.0,0.205775,0.6,0.4,0.4,0.4,0.6,0.8,...,0.5,0.5,0.2,0.6,0.0,0.0,0.0,1.0,1.0,0.0
2,0,0.435897,1.0,0.026858,0.6,0.6,0.6,0.2,1.0,0.6,...,0.0,1.0,0.6,1.0,0.0,0.0,1.0,0.0,1.0,0.0


# 1.0 Logistic Regression Classifier

## 1.1 Training

In [20]:
# define
model = LogisticRegression(C=1.0, solver='newton-cholesky', max_iter=100)

# fit
model.fit(X_train, y_train)

# predict
yhat_train = model.predict(X_train)

# metrics
acc = np.round(mt.accuracy_score(y_train, yhat_train), 3)
print(f"Accuracy: {acc}")

precision = np.round(mt.precision_score(y_train, yhat_train), 3)
print(f"Precision: {precision}")

recall = np.round(mt.recall_score(y_train, yhat_train), 3)
print(f"Recall: {recall}")

f1_score = np.round(mt.f1_score(y_train, yhat_train), 3)
print(f"F1-Score: {f1_score}")

Accuracy: 0.875
Precision: 0.871
Recall: 0.836
F1-Score: 0.853


## 1.2 Validation

In [21]:
# define
model = LogisticRegression(C=1.0, solver='newton-cholesky', max_iter=100)

# fit
model.fit(X_train, y_train)

# predict
yhat_val = model.predict(X_val)

# metrics
acc = np.round(mt.accuracy_score(y_val, yhat_val), 3)
print(f"Accuracy: {acc}")

precision = np.round(mt.precision_score(y_val, yhat_val), 3)
print(f"Precision: {precision}")

recall = np.round(mt.recall_score(y_val, yhat_val), 3)
print(f"Recall: {recall}")

f1_score = np.round(mt.f1_score(y_val, yhat_val), 3)
print(f"F1-Score: {f1_score}")

Accuracy: 0.874
Precision: 0.869
Recall: 0.835
F1-Score: 0.852


## 1.3 Test

In [22]:
# define
model = LogisticRegression(C=1.0, solver='newton-cholesky', max_iter=100)

# fit
model.fit(np.concatenate((X_train, X_val)), np.concatenate((y_train, y_val)))

# predict
yhat_test = model.predict(X_test)

# metrics
acc = np.round(mt.accuracy_score(y_test, yhat_test), 3)
print(f"Accuracy: {acc}")

precision = np.round(mt.precision_score(y_test, yhat_test), 3)
print(f"Precision: {precision}")

recall = np.round(mt.recall_score(y_test, yhat_test), 3)
print(f"Recall: {recall}")

f1_score = np.round(mt.f1_score(y_test, yhat_test), 3)
print(f"F1-Score: {f1_score}")

Accuracy: 0.871
Precision: 0.869
Recall: 0.833
F1-Score: 0.85


