In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("train.csv")

In [3]:
# İlk 5 satır
df.head()

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,...,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes,satisfaction
0,0,70172,Male,Loyal Customer,13,Personal Travel,Eco Plus,460,3,4,...,5,4,3,4,4,5,5,25,18.0,neutral or dissatisfied
1,1,5047,Male,disloyal Customer,25,Business travel,Business,235,3,2,...,1,1,5,3,1,4,1,1,6.0,neutral or dissatisfied
2,2,110028,Female,Loyal Customer,26,Business travel,Business,1142,2,2,...,5,4,3,4,4,4,5,0,0.0,satisfied
3,3,24026,Female,Loyal Customer,25,Business travel,Business,562,2,5,...,2,2,5,3,1,4,2,11,9.0,neutral or dissatisfied
4,4,119299,Male,Loyal Customer,61,Business travel,Business,214,3,3,...,3,3,4,4,3,3,3,0,0.0,satisfied


In [4]:
df.shape  # satır ve sütun sayısı

(103904, 25)

In [5]:
print(df.columns)

Index(['Unnamed: 0', 'id', 'Gender', 'Customer Type', 'Age', 'Type of Travel',
       'Class', 'Flight Distance', 'Inflight wifi service',
       'Departure/Arrival time convenient', 'Ease of Online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Checkin service', 'Inflight service',
       'Cleanliness', 'Departure Delay in Minutes', 'Arrival Delay in Minutes',
       'satisfaction'],
      dtype='object')


In [None]:
df.info()

In [None]:
df.describe()

In [None]:
from pandas.api.types import is_numeric_dtype, is_object_dtype

numeric_columns = [col for col in df.columns if is_numeric_dtype(df[col])]
categorical_columns = [col for col in df.columns if is_object_dtype(df[col])]

In [None]:
df.isnull().sum()

In [None]:
def outlier_summary(df, numeric_columns):
    for col in numeric_columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        outliers = df[(df[col] < lower) | (df[col] > upper)]
        print(f"{col}: {len(outliers)} aykırı değer")

numeric_columns = df.select_dtypes(include=[np.number]).columns
outlier_summary(df, numeric_columns)

In [None]:
for col in ["Flight Distance", "Checkin service", "Departure Delay in Minutes", "Arrival Delay in Minutes"]:
    plt.figure(figsize=(10, 4))
    sns.boxplot(x=df[col])
    plt.title(f"{col} - Box Plot")
    plt.show()

In [None]:
def replace_outliers_with_mean(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    
    mean_value = df[column].mean()
    mean_value = int(mean_value)
    df.loc[(df[column] < lower) | (df[column] > upper), column] = mean_value

replace_outliers_with_mean(df, "Departure Delay in Minutes")
replace_outliers_with_mean(df, "Arrival Delay in Minutes")

In [None]:
plt.figure(figsize=(12, 6))

# Departure Delay Box Plot
plt.subplot(1, 2, 1)
sns.boxplot(data=df, y="Departure Delay in Minutes")
plt.title("Departure Delay in Minutes")

# Arrival Delay Box Plot
plt.subplot(1, 2, 2)
sns.boxplot(data=df, y="Arrival Delay in Minutes")
plt.title("Arrival Delay in Minutes")

plt.show()

In [None]:
print(df["Checkin service"].value_counts())

In [None]:
sns.countplot(x="Checkin service", data=df)
plt.title("Checkin Service Dağılımı")
plt.show()

In [None]:
df["Age"].unique()

In [None]:
df["Customer Type"].unique()

In [None]:
df["satisfaction"].unique()

In [None]:
df["Type of Travel"].unique()

In [None]:
satisfaction_counts = df["satisfaction"].value_counts(normalize=True) * 100
print("Memnuniyet yüzdeleri:\n", satisfaction_counts)

In [None]:
satisfaction_counts = df["satisfaction"].value_counts()
plt.figure(figsize=(8, 6))
plt.pie(satisfaction_counts, labels=satisfaction_counts.index, autopct='%1.1f%%', startangle=140)
plt.title("Yolcu Memnuniyet Yüzdeleri")
plt.show()

In [None]:
satisfied_age_mean = df[df["satisfaction"] == "satisfied"]["Age"].mean()
print("Memnun yolcuların yaş ortalaması:", round(satisfied_age_mean, 2))

In [None]:
satisfied_loyal = df[df["satisfaction"] == "satisfied"]["Customer Type"].value_counts(normalize=True) * 100
print("Memnun yolcularda loyal/disloyal oranı:\n", satisfied_loyal)

In [None]:
disloyal_travel_type = df[df["Customer Type"] == "disloyal Customer"]["Type of Travel"].value_counts(normalize=True) * 100
print("Disloyal yolcuların seyahat amacı:\n", disloyal_travel_type)

In [None]:
Loyal_travel_type = df[df["Customer Type"] == "Loyal Customer"]["Type of Travel"].value_counts(normalize=True) * 100
print("Loyal yolcuların seyahat amacı:\n", Loyal_travel_type)