In [None]:
# Importing required packages
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics

# from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, ExtraTreesClassifier
# from xgboost import XGBClassifier
# from sklearn.neighbors import KNeighborsClassifier

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

%matplotlib inline

In [None]:
# This contains the link to the CSV file hosted on GitHub.
URL = 'https://raw.githubusercontent.com/amirbek-akramov/SatisfactionOfAirlinePassengers/main/train_dataset.csv'

#Reads the CSV file from the URL and loads it into a pandas DataFrame.
train_df = pd.read_csv(URL)

# Displays the first 5 rows of the DataFrame to give you a quick view of the data.
train_df.head()

In [None]:
# Shape of the train_df
train_df.shape

In [None]:
# This line removes the column named 'id' from the DataFrame and "inplace=True" directly saves it
train_df.drop(columns='id', inplace=True)

In [None]:
# Description of the train_df
train_df.describe()

In [None]:
# Information about train_df
train_df.info()

In [None]:
# There is small number of NaN values in the train_df
train_df.isna().sum()

In [None]:
# I just used backfill method
train_df.bfill(inplace=True)

In [None]:
# Checking if there are any NaN values.
train_df.isna().sum()

In [None]:
# Columns to encode
encode_cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class']

# Initializes the label encoder.
label_encoder = LabelEncoder()

# Every columns must be encoded
for column in encode_cols:
    train_df[column] = label_encoder.fit_transform(train_df[column])

train_df

In [None]:
# This will prepare data for model training.
X = train_df.drop(columns='satisfaction')
y = train_df.satisfaction

In [None]:
# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# I use RandomForest due to its best accuracy compared to other classifiers
RF = RandomForestClassifier()

In [None]:
# Training RandomForest
RF.fit(X_train, y_train)

In [None]:
# Predicting
y_pred = RF.predict(X_test)

In [None]:
# Evaluate the RandomForest classifier
print("\nRandomForest classifier Evaluation:")
print("Accuracy Score:", round(metrics.accuracy_score(y_test, y_pred)*100), "%")
print("RMSE:", metrics.mean_squared_error(y_test, y_pred, squared=False))
print("MAE:", metrics.mean_absolute_error(y_test, y_pred))