## Importing Training Data for Model

In [1]:
#Importing libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import pandas as pd


#Loading Training Data
df=pd.read_csv("Train.csv")
df.head()


Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,3.0,0.0,"O'Donoghue, Ms. Bridget",female,,0.0,0.0,364856,7.75,,Q,,,
1,2.0,0.0,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39.0,0.0,0.0,250655,26.0,,S,,,
2,2.0,1.0,"Smith, Miss. Marion Elsie",female,40.0,0.0,0.0,31418,13.0,,S,9,,
3,3.0,1.0,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31.0,1.0,1.0,363291,20.525,,S,C D,,"Strood, Kent, England Detroit, MI"
4,3.0,1.0,"McCoy, Miss. Agnes",female,,2.0,0.0,367226,23.25,,Q,16,,


## Replacing NA values with Mean in Age Column

In [2]:
#Replacing NA with Mean
df.Age.fillna(value=df.Age.mean(),inplace=True)
df.head()


AttributeError: 'DataFrame' object has no attribute 'Age'

## Dropping Irrelevant cols for Plane Crash prediction

In [3]:
#Dropping irrelevant cols
del df['Name']
del df['Ticket']
del df['Cabin']
del df['Fare']
df.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Embarked,Survived
0,2,female,29.0,1,0,S,1
1,3,male,29.70056,0,0,S,0
2,2,male,39.0,0,0,S,0
3,3,female,29.0,0,4,S,0
4,3,male,25.0,0,0,S,0


## Fixing Categorical columns via functions f1 and f2

In [4]:
#Fixing Sex and Embarked cols
def f1(s):
    if s=='male':
        return 0
    else:
        return 1
def f2(s):
    if s=='S':
        return 2
    elif s=='C':
        return 3
    else:
        return 4

df['sex']=df.Sex.apply(f1)
df['Emb']=df.Embarked.apply(f2)
del df['Sex']
del df['Embarked']
df.head()


Unnamed: 0,Pclass,Age,SibSp,Parch,Survived,sex,Emb
0,2,29.0,1,0,1,1,2
1,3,29.70056,0,0,0,0,2
2,2,39.0,0,0,0,0,2
3,3,29.0,0,4,0,1,2
4,3,25.0,0,0,0,0,2


## Making Training Data for model fitting

In [5]:
#Getting X and Y for training
X_train=df[['Pclass','Age','SibSp','Parch','sex','Emb']].values
y_train=df['Survived']


## Training the model

In [6]:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

#Training the Model
model=LogisticRegression()
model.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

## Calculating model score on training data itself

In [7]:
#Calculating Model score after training

model.score(X_train,y_train)

0.7994011976047904

##  Importing Testing Data

In [8]:
#Loading test data set
df1=pd.read_csv("test_titanic_x_test.csv")

## Replacing NAN values in Age Column

In [9]:
#Replacing NA with Mean
df1.Age.fillna(value=df.Age.mean(),inplace=True)
df1.head()

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,2,"Davies, Master. John Morgan Jr",male,8.0,1,1,C.A. 33112,36.75,,S
1,1,"Leader, Dr. Alice (Farnham)",female,49.0,0,0,17465,25.9292,D17,S
2,3,"Kilgannon, Mr. Thomas J",male,29.70056,0,0,36865,7.7375,,Q
3,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Chr...",female,24.0,2,1,243847,27.0,,S
4,1,"McGough, Mr. James Robert",male,36.0,0,0,PC 17473,26.2875,E25,S


## Dropping irrelevant cols with regards to plane crash

In [10]:
#Dropping irrelevant cols
del df1['Name']
del df1['Ticket']
del df1['Cabin']
del df1['Fare']
df1.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Embarked
0,2,male,8.0,1,1,S
1,1,female,49.0,0,0,S
2,3,male,29.70056,0,0,Q
3,2,female,24.0,2,1,S
4,1,male,36.0,0,0,S


## Fixing categorical values via f1 and f2 functions

In [11]:
#Fixing Sex and Embarked cols
def f1(s):
    if s=='male':
        return 0
    else:
        return 1
def f2(s):
    if s=='S':
        return 2
    elif s=='C':
        return 3
    else:
        return 4

df1['sex']=df1.Sex.apply(f1)
df1['Emb']=df1.Embarked.apply(f2)
del df1['Sex']
del df1['Embarked']
df1.head()


Unnamed: 0,Pclass,Age,SibSp,Parch,sex,Emb
0,2,8.0,1,1,0,2
1,1,49.0,0,0,1,2
2,3,29.70056,0,0,0,4
3,2,24.0,2,1,1,2
4,1,36.0,0,0,0,2


## Preparing testing data

In [12]:
#Getting X of testing
X_test=df1[['Pclass','Age','SibSp','Parch','sex','Emb']].values

## Saving the predictions in a CSV File , predictions.csv

In [13]:
pred=model.predict(X_test)
#Saving predictions
l=[]
for i in pred:
    
    l.append(int(i))
y_test_pred=np.array(l)

np.savetxt("predictions.csv",y_test_pred,delimiter=",",fmt='%d')