# Authors: Barış Bali (20190301510) Computer Engineering (4+1)
#          Mustafa Dindar (20180301010) Computer Engineering (4+1)

# Titanic Dataset Classification Model
1) Selected Dataset in csv format is: Titanic Dataset from Kagle.
2) Attributes in detail: 

PassengerId: A unique identifier for each passenger.  

Survived: Indicates if the passenger survived (1) or did not survive (0).  

Pclass (Ticket Class): Represents the ticket class (1st, 2nd, or 3rd class).  

Name: Name of the passenger.  

Sex: Gender of the passenger (male or female).  

Age: Age of the passenger (some entries might be missing).  

SibSp: Number of siblings/spouses aboard the Titanic.  

Parch: Number of parents/children aboard the Titanic.  

Ticket: Ticket number.  

Fare: Passenger fare.  

Cabin: Cabin number (some entries might be missing).  

Embarked: Port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton).  

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer

#3) Convertion the "Sex" and "Embarked" attribute to numerical values using label encoder.

#Reading the data with pandas
titanic_df = pd.read_csv('Titanic-Dataset.csv')
#displaying dataset read by pandas
print(titanic_df.head())

label_encoder=LabelEncoder()

#Using label encoder to transform this categorical values to numerical values
titanic_df['Sex'] = label_encoder.fit_transform(titanic_df['Sex'])
titanic_df['Embarked'] = titanic_df['Embarked'].fillna('Unknown') #for filling missing values
titanic_df['Embarked'] = label_encoder.fit_transform(titanic_df['Embarked'])

#to display with numerical values
print(titanic_df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  
  

In [2]:
# 4) Normalization using Standart Scaler
#normalization with standart scaler module (all numeric values)
numeric_columns = ['Age', 'Fare', 'Sex', 'Embarked']
scaler =StandardScaler()
titanic_df[numeric_columns] = scaler.fit_transform(titanic_df[numeric_columns])
#Normalization df
print(titanic_df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name       Sex       Age  \
0                            Braund, Mr. Owen Harris  0.737695 -0.530377   
1  Cumings, Mrs. John Bradley (Florence Briggs Th... -1.355574  0.571831   
2                             Heikkinen, Miss. Laina -1.355574 -0.254825   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel) -1.355574  0.365167   
4                           Allen, Mr. William Henry  0.737695  0.365167   

   SibSp  Parch            Ticket      Fare Cabin  Embarked  
0      1      0         A/5 21171 -0.502445   NaN  0.581114  
1      1      0          PC 17599  0.786845   C85 -1.938460  
2      0      0  STON/O2. 3101282 -0.488854   NaN  0.581114  
3      1      0            113803  0.420730  C123  0.581114  
4      0      0 

In [3]:
# 5) Comparing the performance values.

#KNN three time (3,7,10)
X = titanic_df[['Age', 'Fare', 'Sex', 'Embarked']] 
y = titanic_df['Survived'] 

# Adding a simple imputer because of NaN values in the dataset
imputer = SimpleImputer(strategy='mean') 
X_imputed = imputer.fit_transform(X)

#train test splits for data
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)
for k in [3, 7, 11]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"KNN (K={k})")
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)


KNN (K=3)
Accuracy: 0.7430167597765364
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.82      0.79       105
           1       0.71      0.64      0.67        74

    accuracy                           0.74       179
   macro avg       0.74      0.73      0.73       179
weighted avg       0.74      0.74      0.74       179

KNN (K=7)
Accuracy: 0.7541899441340782
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.80      0.79       105
           1       0.71      0.69      0.70        74

    accuracy                           0.75       179
   macro avg       0.75      0.74      0.75       179
weighted avg       0.75      0.75      0.75       179

KNN (K=11)
Accuracy: 0.7597765363128491
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.83      0.80       105
           1       0.73      0.66      0.70    

In [7]:
#MLP
configurations = [
    (32,),  # Single hidden layer with 32 neurons
    (32, 32),  # Two hidden layers with 32 neurons each
    (32, 32, 32),  # Three hidden layers with 32 neurons each
]

for config in configurations:
    mlp = MLPClassifier(hidden_layer_sizes=config)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    
    print(f"MLP (Config={config})")
    print("Accuracy:", accuracy)
    print("Classification Report:\n", report)




MLP (Config=(32,))
Accuracy: 0.7821229050279329
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.84      0.82       105
           1       0.75      0.70      0.73        74

    accuracy                           0.78       179
   macro avg       0.78      0.77      0.77       179
weighted avg       0.78      0.78      0.78       179





MLP (Config=(32, 32))
Accuracy: 0.7486033519553073
Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.81      0.79       105
           1       0.71      0.66      0.69        74

    accuracy                           0.75       179
   macro avg       0.74      0.74      0.74       179
weighted avg       0.75      0.75      0.75       179

MLP (Config=(32, 32, 32))
Accuracy: 0.7653631284916201
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.80      0.80       105
           1       0.72      0.72      0.72        74

    accuracy                           0.77       179
   macro avg       0.76      0.76      0.76       179
weighted avg       0.77      0.77      0.77       179





In [8]:
#NB
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Naïve Bayes")
print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Naïve Bayes
Accuracy: 0.7597765363128491
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.79      0.79       105
           1       0.71      0.72      0.71        74

    accuracy                           0.76       179
   macro avg       0.75      0.75      0.75       179
weighted avg       0.76      0.76      0.76       179



# Analysis results using accuracy P, R adn F values.
#  K-Nearest Neighbors (KNN)
K=3
Accuracy: 0.743
Precision: 0.74
Recall: 0.73
F1-score: 
0.74
K=7
Accuracy: 0.754
Precision: 0.75
Recall: 0.74
F1-sc
ore: 0.75
K=11
Accuracy: 0.760
Precision: 0.75
Recall: 0.75

#  F1-score: 0.75
Multi-Layer Perceptron (MLP)
Config=(32,)
Accuracy: 0.777
Precision: 0.77
Recall: 
0.76
F1-score: 0.77
Config=(32, 32)
Accuracy: 0.765
Precision: 0.76
Rec
all: 0.76
F1-score: 0.76
Config=(32, 32, 32)
Accuracy: 0.754
Precision: 0.75
#  
Recall: 0.75
F1-score: 0.75
Naïve Bayes (NB)
Accuracy: 0.760
Precision: 0.75
Recall: 0.75
F1-score: 0.75