In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
train = pd.read_csv('../input/titanic/train.csv')
test = pd.read_csv('../input/titanic/test.csv')

In [None]:
train.shape,test.shape

In [None]:
import missingno as msno
msno.bar(train,figsize=(10,5),fontsize=12,color='orange')

In [None]:
sns.set_style('whitegrid')
fig,ax = plt.subplots(figsize = (9,6))
sns.countplot(x='Survived',data=train,palette='RdBu_r')
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+p.get_width()/2., height + 1,height ,ha="center")

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.countplot(x='Survived',data=train,hue='Sex',palette='RdBu_r')

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.countplot(x='Survived',data=train,hue='Pclass',palette='viridis')

In [None]:
fig = px.pie(data_frame=train,values='Survived',names='Sex',template='seaborn')
fig.update_traces(rotation=90, pull=0.05, textinfo="percent+label")
fig.update(layout_title_text='Sex composition of Survive Passengers',
           layout_showlegend=False)

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.boxplot(x='Pclass',y='Age',data=train)
plt.ylim(0,90)

In [None]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):
        
        if Pclass==1:
            return 37
        elif Pclass==2:
            return 29
        elif Pclass==3:
            return 24
    else: 
        return Age

In [None]:
train['Age'] = train[['Age','Pclass']].apply(impute_age,axis=1)

In [None]:
msno.bar(train,figsize=(10,5),fontsize=12,color='orange')

In [None]:
plt.figure(figsize=(12,5))
sns.histplot(train['Age'],bins=25)
plt.xlim(0,100)
plt.ylim(0,225)

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(x='SibSp',data=train)
plt.ylim(0,700)

In [None]:
plt.figure(figsize=(12,5))
sns.histplot(x='Fare',data=train,bins=30)
plt.xlim(right=600)
plt.ylim(top=500)

In [None]:
sex = pd.get_dummies(train['Sex'],drop_first=True)

In [None]:
embark = pd.get_dummies(train['Embarked'],drop_first=True)

In [None]:
train.drop(['Sex','Name','Ticket','Embarked'],axis=1,inplace=True)

In [None]:
train = pd.concat([train,sex,embark],axis=1)

In [None]:
train.head()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = train.drop(['Survived','Cabin'],axis=1)
y = train['Survived']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=50)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.linear_model import Lasso,Ridge
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor,KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import classification_report,confusion_matrix,mean_absolute_error,mean_squared_error,r2_score

In [None]:
lm = LinearRegression()
lm.fit(X_train,y_train)
predictions = lm.predict(X_test)

In [None]:
print('Coefficients:\n',lm.coef_)
print('\n')
print('Intercept:',lm.intercept_)
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',(r2_score(y_test,predictions)))

In [None]:
lasso = Lasso()
lasso.fit(X_train,y_train)
predictions = lasso.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
ridge = Ridge()
ridge.fit(X_train,y_train)
predictions = ridge.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)
predictions = logmodel.predict(X_test)

In [None]:
print('Confusion Matrix:')
print(confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:')
print(classification_report(y_test,predictions))
print('\n')
print('Coefficients:\n',lm.coef_)
print('\n')
print('Intercept:',lm.intercept_)
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
dt = DecisionTreeRegressor()
dt.fit(X_train,y_train)
predictions = dt.predict(X_test)

In [None]:
print('Confusion Matrix:')
print(confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:')
print(classification_report(y_test,predictions))
print('\n')
print('Coefficients:\n',lm.coef_)
print('\n')
print('Intercept:',lm.intercept_)
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
rfr = RandomForestRegressor()
rfr.fit(X_train,y_train)
predictions = rfr.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
type(predictions)

In [None]:
etr = ExtraTreesRegressor()
etr.fit(X_train,y_train)
predictions = etr.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
svm = SVC()
svm.fit(X_train,y_train)
predictions = svm.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
knr = KNeighborsRegressor()
knr.fit(X_train,y_train)
predictions = knr.predict(X_test)

In [None]:
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
predictions = knn.predict(X_test)

In [None]:
print('Confusion Matrix:')
print(confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:')
print(classification_report(y_test,predictions))
print('\n')
print('Coefficients:\n',lm.coef_)
print('\n')
print('Intercept:',lm.intercept_)
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',(r2_score(y_test,predictions)))

In [None]:
gnb = GaussianNB()
gnb.fit(X_train,y_train)
predictions = gnb.predict(X_test)

In [None]:
print('Confusion Matrix:')
print(confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:')
print(classification_report(y_test,predictions))
print('\n')
print('Coefficients:\n',lm.coef_)
print('\n')
print('Intercept:',lm.intercept_)
print('\n')
print('Mean Absolute Error:',metrics.mean_absolute_error(y_test,predictions))
print('\n')
print('Mean Squared Error:',metrics.mean_squared_error(y_test,predictions))
print('\n')
print('Root Mean Squared Error:',np.sqrt(metrics.mean_squared_error(y_test,predictions)))
print('\n')
print('R-Squared Score:',round(r2_score(y_test,predictions),3))

In [None]:
predictions = ridge.predict(X_test)
predictions = predictions.astype(int)

In [None]:
final_submission = pd.read_csv('../input/titanic/gender_submission.csv')
final_submission = final_submission.drop('Survived',axis=1)
final_submission['Survived'] = pd.DataFrame(data=predictions)

In [None]:
final_submission.to_csv('final_gender_submit.csv',index=False)