In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
dataset = pd.read_csv('../input/titanic/train.csv')
test_data = pd.read_csv('../input/titanic/test.csv')

In [None]:
test_data.head()

In [None]:
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.info()

In [None]:
dataset.shape

In [None]:
sns.heatmap(dataset.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
sns.set_style('whitegrid')
fig,ax = plt.subplots(figsize = (9,6))
sns.countplot(x='Survived',data=dataset,palette='RdBu_r')
for p in ax.patches:
    height = p.get_height()
    ax.text(p.get_x()+p.get_width()/2., height + 1,height ,ha="center")

In [None]:
dataset['Sex'].value_counts

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.countplot(x='Survived',data=dataset,hue='Sex',palette='RdBu_r')

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.countplot(x='Survived',data=dataset,hue='Pclass',palette='viridis')

In [None]:
fig = px.pie(data_frame=dataset,values='Survived',names='Sex',template='seaborn')
fig.update_traces(rotation=90, pull=0.05, textinfo="percent+label")
fig.update(layout_title_text='Sex composition of Survive Passengers',
           layout_showlegend=False)

In [None]:
dataset.isnull().sum()

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(8,5))
sns.boxplot(x='Pclass',y='Age',data=dataset)
plt.ylim(0,90)

In [None]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):
        
        if Pclass==1:
            return 37
        elif Pclass==2:
            return 29
        elif Pclass==3:
            return 24
    else: 
        return Age

In [None]:
dataset['Age'] = dataset[['Age','Pclass']].apply(impute_age,axis=1)

In [None]:
dataset.isnull().sum()

In [None]:
sns.heatmap(dataset.drop('Cabin',axis=1).isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
plt.figure(figsize=(12,5))
sns.histplot(dataset['Age'],bins=25)
plt.xlim(0,100)
plt.ylim(0,225)

In [None]:
plt.figure(figsize=(8,5))
sns.countplot(x='SibSp',data=dataset)
plt.ylim(0,700)

In [None]:
plt.figure(figsize=(12,5))
sns.histplot(x='Fare',data=dataset,bins=30)
plt.xlim(right=600)
plt.ylim(top=500)

In [None]:
sex = pd.get_dummies(dataset['Sex'],drop_first=True)

In [None]:
embark = pd.get_dummies(dataset['Embarked'],drop_first=True)

In [None]:
dataset.drop(['Sex','Name','Ticket','Embarked'],axis=1,inplace=True)

In [None]:
dataset = pd.concat([dataset,sex,embark],axis=1)

In [None]:
dataset = dataset.drop('Cabin',axis=1)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = dataset.drop('Survived',axis=1)
y = dataset['Survived']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=50)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
print('The accuracy score of the predictions made is:',accuracy_score(y_test,predictions))
print('\n')
print('Confusion Matrix:')
print(confusion_matrix(y_test,predictions))
print('\n')
print('Classification Report:')
print(classification_report(y_test,predictions))

In [None]:
final_submission = pd.read_csv('../input/titanic/gender_submission.csv')
final_submission = final_submission.drop('Survived',axis=1)
final_submission['Survived'] = pd.DataFrame(predictions)

In [None]:
final_submission.to_csv('final_gender_submission.csv',index=False)