### Importing dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


### Reading the dataset and renaming a few columns

In [None]:
df=pd.read_csv('titanic.csv')
df.rename(columns={'Pclass':'Passenger_class'},inplace=True)
df.rename(columns={'SibSp':'Sibling_spouse'},inplace=True)
df.rename(columns={'Parch':'Parent_children'},inplace=True)
df.head(10)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.dtypes

#### Checking relationship between Passenger class and Survival

In [None]:
sns.countplot(x=df['Survived'],hue=df['Passenger_class'])

In [None]:
df.isna().sum() # checking for null values 

#### Relationship between Sex and Survival

In [None]:
sns.countplot(x=df['Sex'],hue=df['Survived'])

In [None]:
df['Fare'].fillna(df['Fare'].mean(),inplace=True) # 1 Value was missing so we replaced nan \
                                                  # with the mean of the remaining values in 'Fare' column
df

In [None]:
high_fare_df = df[df['Fare'] < 30.0]               # By playing with the number here we can see that \
sns.countplot(x=df['Survived'], data=high_fare_df) # higher the fare, more the chances to survive
# shows high fare increases the survival rate

In [None]:
sns.countplot(x=df['Sex'],hue=df['Survived'])
# shows all the female survived whereas no male survived

In [None]:
df.columns

#### Encoding the Sex values('Male', 'Female') to int values

In [None]:
le=LabelEncoder()
df['Sex']=le.fit_transform(df['Sex'])
df['Sex']
# male=1
# female=0

In [None]:
df=df.drop(['Age'],axis=1)
df

### Loading the independent and dependent variables

In [208]:
X=df[['Sex','Fare','Passenger_class']].values
y=df['Survived'].values

----

##### Checking the relationship b/w the following columns:
* Sex
* Fare
* Passenger Class
* Survived

In [None]:
sns.pairplot(df[['Sex','Fare','Passenger_class','Survived']])

#### Splitting the train and test data

In [209]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

### Fitting the Logistic Regression model

In [210]:
log_reg=LogisticRegression()
log_reg.fit(X_train,y_train)
y_pred_train=log_reg.predict(X_train)
y_pred_test=log_reg.predict(X_test)

#### Checking the accuracy

In [None]:
print("Train accuracy score: ",accuracy_score(y_train,y_pred_train))
print("Test accuracy score: ",accuracy_score(y_test,y_pred_test))