In [23]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [24]:
df= pd.read_csv("/kaggle/input/titanic-compitition-datasets/train.csv")
df.head()

In [25]:
df.count()

# Data Cleaning and Preprocessing on "train.csv"

In [26]:
df.drop(['PassengerId','Cabin','Ticket','Fare','Name'], axis=1, inplace= True)

In [27]:
print(df.describe())
print(df.isna().sum())

In [28]:
df['Age'].fillna(df['Age'].mean(), inplace= True)

In [29]:
df['Survived'].groupby([df['Sex'],df['Survived']]).count()

In [30]:
from sklearn.preprocessing import LabelEncoder

In [31]:
print(df['Embarked'].unique())
df['Embarked'].fillna('Q', inplace= True)

In [32]:
le= LabelEncoder()
df['Sex']= le.fit_transform(df['Sex'])
df['Embarked']= le.fit_transform(df['Embarked'])
df.head()

In [33]:
df.isna().sum()

In [34]:
df.info()

In [35]:
df.groupby([df.Survived]).count().plot(kind='pie', y='Sex', 
                                      figsize= (5,7), 
                                      autopct='%1.1f%%')

In [36]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_absolute_error, confusion_matrix
from sklearn import metrics
from sklearn import svm

In [37]:
X= df.drop(['Survived'], axis=1)
y= df['Survived']
X_train,X_test,y_train,y_test= train_test_split(X,y, test_size= 0.33, random_state=10)

# Classification using SVM

In [38]:
SVM_model= svm.SVC(kernel ='linear')
my_model_1= SVM_model.fit(X_train,y_train)

In [39]:
predictions_1= my_model_1.predict(X_test)
predictions_1

In [40]:
print("Accuracy of SVM model is: ",accuracy_score(y_test,predictions_1,"\n"))
print("SVM Classification report: ","\n", metrics.classification_report(y_test,predictions_1))
print("Confusion matrix for SVM:","\n",confusion_matrix(y_test,predictions_1))

# Classification using BaggingClassifier

In [41]:
Bagging_Decision_tree_model= BaggingClassifier(DecisionTreeClassifier(random_state= 20), 
                                               n_estimators= 500, max_samples= 100,
                                               bootstrap=True, n_jobs=-1)
my_model_2= Bagging_Decision_tree_model.fit(X_train,y_train)

In [42]:
predictions_2= my_model_2.predict(X_test)
predictions_2

In [43]:
print("Accuracy of BaggingClassifier model is: ",accuracy_score(y_test,predictions_2,"\n"))
print("BaggingClassifier Classification report: ","\n", metrics.classification_report(y_test,predictions_2))
print("Confusion matrix for BaggingClassifier:","\n",confusion_matrix(y_test,predictions_2))

# Classification using AdaBoost

In [44]:
AdaBoost_model= AdaBoostClassifier(random_state=100,n_estimators=50,
                         learning_rate=1)
my_model_3= AdaBoost_model.fit(X_train,y_train)

In [45]:
predictions_3= my_model_3.predict(X_test)
predictions_3

In [46]:
print("Accuracy of AdaBoost model is: ",accuracy_score(y_test,predictions_3,"\n"))
print("AdaBoost Classification report: ","\n", metrics.classification_report(y_test,predictions_3))
print("Confusion matrix for AdaBoost:","\n",confusion_matrix(y_test,predictions_3))

# Classification using Logistic Regression

In [47]:
LogisticR_model= LogisticRegression(solver= 'liblinear')
my_model_4= LogisticR_model.fit(X_train,y_train)

In [48]:
predictions_4= my_model_4.predict(X_test)
predictions_4

In [49]:
print("Accuracy of Logistic Regression model is: ",accuracy_score(y_test,predictions_4,"\n"))
print("Logistic Regression Classification report: ","\n", metrics.classification_report(y_test,predictions_4))
print("Confusion matrix for Logistic Regression:","\n",confusion_matrix(y_test,predictions_4))

# Data Cleaning and Preprocessing on "test.csv"

In [50]:
df2= pd.read_csv("/kaggle/input/titanic-compitition-datasets/test.csv")
df2.head()

In [51]:
df2.count()

In [52]:
df2.drop(['PassengerId','Cabin','Ticket','Fare','Name'], axis=1, inplace= True)

In [53]:
print(df2.describe())
print(df2.isna().sum())

In [54]:
df2['Age'].fillna(df2['Age'].mean(), inplace= True)

In [55]:
le= LabelEncoder()
df2['Sex']= le.fit_transform(df2['Sex'])
df2['Embarked']= le.fit_transform(df2['Embarked'])
df2.head()

In [56]:
print(df2.isna().sum())
print(df2.info())

In [57]:
submission= pd.read_csv("/kaggle/input/titanic-compitition-datasets/gender_submission.csv")
submission.head()

In [58]:
submission.drop(['PassengerId'], axis=1, inplace=True)
submission.head()

# Predctions on "test.csv" using:
1. SVM
2. BaggingClassifier
3. AdaBoost
4. Logistic Regression

In [59]:
test_predictions_1= my_model_1.predict(df2)
test_predictions_2= my_model_2.predict(df2)
test_predictions_3= my_model_3.predict(df2)
test_predictions_4= my_model_4.predict(df2)

In [60]:
print("Accuracy of SVM model on test.csv is: ",accuracy_score(submission,test_predictions_1,"\n"))
print("Accuracy of BaggingClassifier model on test.csv is: ",accuracy_score(submission,test_predictions_2,"\n"))
print("Accuracy of AdaBoost model on on test.csv is: ",accuracy_score(submission,test_predictions_3,"\n"))
print("Accuracy of Logistic Regression model on on test.csv is: ",accuracy_score(submission,test_predictions_4,"\n"))

# **SVM** model has the highest accuracy of 1.0

In [62]:
print("Accuracy of SVM model on train.csv is: ",accuracy_score(y_test,predictions_1,"\n"))
print("Accuracy of SVM model on test.csv is: ",accuracy_score(submission,test_predictions_1,"\n"))
print("SVM Classification report: ","\n", metrics.classification_report(submission,test_predictions_1))
print("Confusion matrix for SVM model","\n",confusion_matrix(submission,test_predictions_1))
test_predictions_1

In [73]:
temp= pd.DataFrame(test_predictions_1, columns= ['Survived'])

In [79]:
test_for_sub= pd.read_csv("/kaggle/input/titanic-compitition-datasets/test.csv")
final_submission= pd.DataFrame()
final_submission['PassengerId']= test_for_sub['PassengerId']
final_submission['Survived']= temp
final_submission.head()

In [81]:
final_submission.count()

In [88]:
final_submission.to_csv("./B:\Machine learning\Portfolio\Titanic compitition\final.csv", index= False)