In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

In [3]:
df=pd.read_csv('/Users/hondamir/Desktop/Artificial Intelligence/Airplane Services.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 24 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   id                                 50000 non-null  int64  
 1   Gender                             50000 non-null  object 
 2   Customer Type                      50000 non-null  object 
 3   Age                                50000 non-null  int64  
 4   Type of Travel                     50000 non-null  object 
 5   Class                              50000 non-null  object 
 6   Flight Distance                    50000 non-null  int64  
 7   Inflight wifi service              50000 non-null  int64  
 8   Departure/Arrival time convenient  50000 non-null  int64  
 9   Ease of Online booking             50000 non-null  int64  
 10  Gate location                      50000 non-null  int64  
 11  Food and drink                     50000 non-null  int

In [5]:
# Klibsiz ma'lumotlarni qayta ishlash

df_filled = df.copy()
df_filled["Arrival Delay in Minutes"].fillna(df_filled["Arrival Delay in Minutes"].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_filled["Arrival Delay in Minutes"].fillna(df_filled["Arrival Delay in Minutes"].median(), inplace=True)


In [6]:
# Kategorik ma'lumotlarni kodlash
df_encoded = pd.get_dummies(df_filled, drop_first=True)

In [8]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 25 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   id                                 50000 non-null  int64  
 1   Age                                50000 non-null  int64  
 2   Flight Distance                    50000 non-null  int64  
 3   Inflight wifi service              50000 non-null  int64  
 4   Departure/Arrival time convenient  50000 non-null  int64  
 5   Ease of Online booking             50000 non-null  int64  
 6   Gate location                      50000 non-null  int64  
 7   Food and drink                     50000 non-null  int64  
 8   Online boarding                    50000 non-null  int64  
 9   Seat comfort                       50000 non-null  int64  
 10  Inflight entertainment             50000 non-null  int64  
 11  On-board service                   50000 non-null  int

In [10]:
# Ma'lumotlarni ajratib chiqish
x = df_encoded.drop(columns=["id", "satisfaction_satisfied"])
y = df_encoded["satisfaction_satisfied"]

In [12]:
# Ma'lumotlarni train va test qismiga ajratib chiqish
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [14]:
# Klibsiz modelni o'qitish
model_no_klib = RandomForestClassifier(n_estimators=100, random_state=42)
model_no_klib.fit(x_train, y_train)
y_pred_no_klib = model_no_klib.predict(x_test)
accuracy_no_klib = accuracy_score(y_test, y_pred_no_klib)

In [15]:
# Klibli modelni o'qitish
df_klib = df_encoded.copy()
scaler = StandardScaler()
numerical_cols = ["Age", "Flight Distance", "Departure Delay in Minutes", "Arrival Delay in Minutes"]
df_klib[numerical_cols] = scaler.fit_transform(df_klib[numerical_cols])

In [17]:
# Train va test qismiga ajratib chiqish
x_klib = df_klib.drop(columns=["id", "satisfaction_satisfied"])
y_klib = df_klib["satisfaction_satisfied"]
x_train_klib, x_test_klib, y_train_klib, y_test_klib = train_test_split(x_klib, y_klib, test_size=0.2, random_state=42)

In [18]:
# Klibli modelni o'qitish
model_klib = RandomForestClassifier(n_estimators=100, random_state=42)
model_klib.fit(x_train_klib, y_train_klib)
y_pred_klib = model_klib.predict(x_test_klib)
accuracy_klib = accuracy_score(y_test_klib, y_pred_klib)

In [25]:
# Natijalarni chiqarish
result = pd.DataFrame({
    "Model": ["Klibsiz", "Klib bilan"],
    "Accuracy": [accuracy_no_klib, accuracy_klib]
})

In [26]:
# Natijalarni tabulate yordamida chiqarish
print(tabulate(result, headers='keys', tablefmt='pretty'))

+---+------------+----------+
|   |   Model    | Accuracy |
+---+------------+----------+
| 0 |  Klibsiz   |  0.9623  |
| 1 | Klib bilan |  0.9621  |
+---+------------+----------+
