# Importing Packages

In [1]:
import pandas as pd
import numpy as np

## Importing Data

In [2]:
train_data=pd.read_csv("G:\\Download\\Compressed\\all\\train.csv")
test_data=pd.read_csv("G:\\Download\\Compressed\\all\\test.csv")
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
train_data_shape=train_data.shape
test_data_shape=test_data.shape

In [4]:
print("Training Dataset Has Dimension: " + str(train_data.shape) + "\nTest Dataset Has Dimension: " + str(test_data_shape))

Training Dataset Has Dimension: (891, 12)
Test Dataset Has Dimension: (418, 11)


In [5]:
train_data_count=train_data.count()
test_data_count=test_data.count()

## Training Dataset Count

In [6]:
print(train_data_count) 

PassengerId    891
Survived       891
Pclass         891
Name           891
Sex            891
Age            714
SibSp          891
Parch          891
Ticket         891
Fare           891
Cabin          204
Embarked       889
dtype: int64


## Test Dataset Count

In [7]:
print(test_data_count) 

PassengerId    418
Pclass         418
Name           418
Sex            418
Age            332
SibSp          418
Parch          418
Ticket         418
Fare           417
Cabin           91
Embarked       418
dtype: int64


In [8]:
print(train_data_count.min())
print(test_data_count.min())

204
91


## Checking NAN values

In [9]:
if(train_data_shape[0]==train_data_count.min()):
    print("We have complete Training Data")
else:
    print("We have NAN values in our training data")

if(test_data_shape[0]==test_data_count.min()):
    print("We have complete test Data")
else:
    print("We have NAN values in our test data")

We have NAN values in our training data
We have NAN values in our test data


## Checking dtypes 

In [10]:
train_data.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [11]:
train_data.dtypes.reset_index()

Unnamed: 0,index,0
0,PassengerId,int64
1,Survived,int64
2,Pclass,int64
3,Name,object
4,Sex,object
5,Age,float64
6,SibSp,int64
7,Parch,int64
8,Ticket,object
9,Fare,float64


In [12]:
dtypes=train_data.dtypes.reset_index()
dtypes.columns=['Column Name','Column Type']
dtypes

Unnamed: 0,Column Name,Column Type
0,PassengerId,int64
1,Survived,int64
2,Pclass,int64
3,Name,object
4,Sex,object
5,Age,float64
6,SibSp,int64
7,Parch,int64
8,Ticket,object
9,Fare,float64


In [13]:
dtypes.groupby('Column Type').aggregate('count').reset_index()

Unnamed: 0,Column Type,Column Name
0,int64,5
1,float64,2
2,object,5


## NAN Values

In [14]:
train_data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [15]:
test_data.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [16]:
null=pd.concat([train_data.isnull().sum(),test_data.isnull().sum()],axis=1,sort=False,keys=['Train Dataset', 'Test Dataset'])
null[null.sum(axis=1)>1]

Unnamed: 0,Train Dataset,Test Dataset
Age,177,86.0
Cabin,687,327.0
Embarked,2,0.0


# Analyzing Data

### 1. Class vs Survived

In [17]:
train_data[['Pclass','Survived']].groupby(['Pclass'], as_index=False).mean()

Unnamed: 0,Pclass,Survived
0,1,0.62963
1,2,0.472826
2,3,0.242363


### 2. Gender vs Survived

In [18]:
train_data[['Sex','Survived']].groupby(['Sex'], as_index=False).mean()

Unnamed: 0,Sex,Survived
0,female,0.742038
1,male,0.188908


### 3. Siblings-Spouse vs Survived

In [19]:
train_data[['SibSp','Survived']].groupby(['SibSp'], as_index=False).mean()

Unnamed: 0,SibSp,Survived
0,0,0.345395
1,1,0.535885
2,2,0.464286
3,3,0.25
4,4,0.166667
5,5,0.0
6,8,0.0


### 4. Parents-Children vs Survived

In [20]:
train_data[['Parch','Survived']].groupby(['Parch'], as_index=False).mean()

Unnamed: 0,Parch,Survived
0,0,0.343658
1,1,0.550847
2,2,0.5
3,3,0.6
4,4,0.0
5,5,0.2
6,6,0.0


# Data Cleaning

### Adding Minor Column

In [21]:
train_data['isMinor']=0
train_data.loc[(train_data['Age']<14) & ((train_data['Pclass']==1) | (train_data['Pclass']==2)) , 'isMinor']=1

test_data['isMinor']=0
test_data.loc[(test_data['Age']<14) & ((test_data['Pclass']==1) | (test_data['Pclass']==2)) , 'isMinor']=1

train_data.head()



Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


In [22]:
train_data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
isMinor          0
dtype: int64

### Replacing Missing Age values with Random values (between mean-std and mean+std)

In [23]:
train_data.loc[np.isnan(train_data['Age']), 'Age']=np.random.randint(train_data['Age'].mean()-train_data['Age'].std(), train_data['Age'].mean()+train_data['Age'].std())
test_data.loc[np.isnan(test_data['Age']), 'Age']=np.random.randint(test_data['Age'].mean()-test_data['Age'].std(), test_data['Age'].mean()+test_data['Age'].std())
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0


### Adding Numerical equivalent of Embarked values

In [24]:
## Replacing NAN with S
train_data["Embarked"].fillna('S', inplace=True)
test_data["Embarked"].fillna('S', inplace=True)

In [25]:
### Converting

In [26]:
train_data['Port']=0
train_data['Port']=train_data['Embarked'].map( {'S':0, 'C':1, 'Q':2} ).astype(int)

test_data['Port']=0
test_data['Port']=test_data['Embarked'].map( {'S':0, 'C':1, 'Q':2} ).astype(int)

train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,0,0
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,0,1
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,0,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0,0
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0,0


### Filling Fare NAN values with median

In [27]:
train_data['Fare'].fillna(train_data['Fare'].median(),inplace=True)

In [28]:
test_data['Fare'].fillna(train_data['Fare'].median(),inplace=True)
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,2
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,0
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,2
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,0


### Adding Has_Cabin Column

In [29]:
train_data['Has_Cabin'] = train_data["Cabin"].apply(lambda x: 0 if type(x) == float else 1)
test_data['Has_Cabin'] = test_data["Cabin"].apply(lambda x: 0 if type(x) == float else 1)

test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,2,0
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,0,0
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,2,0
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0,0
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,0,0


### Adding Family Size

In [30]:
full_data=[train_data,test_data]
for data in full_data:
    data['FamilySize']=data['Parch']+data['SibSp']+1

### Adding IsAlone Column

In [31]:
for data in full_data:
    data['IsAlone']=0
    data.loc[data['FamilySize'] == 1, 'IsAlone'] = 1

### Adding Family Size Group Column

In [32]:
for data in full_data:
    data['FamilySizeGroup']='small'
    data.loc[data['FamilySize'] == 1, 'FamilySizeGroup'] = 'Alone'
    data.loc[data['FamilySize'] >= 5, 'FamilySizeGroup'] = 'Big'        

In [33]:
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin,FamilySize,IsAlone,FamilySizeGroup
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q,0,2,0,1,1,Alone
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S,0,0,0,2,0,small
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q,0,2,0,1,1,Alone
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S,0,0,0,1,1,Alone
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S,0,0,0,3,0,small


### Extracting and Adding Titles 

In [34]:
train_data['Title'] = train_data.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
test_data['Title'] = test_data.Name.str.extract(' ([A-Za-z]+)\.', expand=False)

In [35]:
for data in full_data:
    data['Title']=data['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr', 'Major', 'Rev', 'Sir',
                                         'Jonkheer', 'Dona'], 'Rare')
    data['Title']=data['Title'].replace(['Mlle', 'Ms','Mme'], 'Miss')

In [36]:
train_data['Title']

0          Mr
1         Mrs
2        Miss
3         Mrs
4          Mr
5          Mr
6          Mr
7      Master
8         Mrs
9         Mrs
10       Miss
11       Miss
12         Mr
13         Mr
14       Miss
15        Mrs
16     Master
17         Mr
18        Mrs
19        Mrs
20         Mr
21         Mr
22       Miss
23         Mr
24       Miss
25        Mrs
26         Mr
27         Mr
28       Miss
29         Mr
        ...  
861        Mr
862       Mrs
863      Miss
864        Mr
865       Mrs
866      Miss
867        Mr
868        Mr
869    Master
870        Mr
871       Mrs
872        Mr
873        Mr
874       Mrs
875      Miss
876        Mr
877        Mr
878        Mr
879       Mrs
880       Mrs
881        Mr
882      Miss
883        Mr
884        Mr
885       Mrs
886      Rare
887      Miss
888      Miss
889        Mr
890        Mr
Name: Title, Length: 891, dtype: object

#### Checking Survived Rate for Family Size

In [37]:
train_data[['FamilySize','Survived']].groupby(['FamilySize'], as_index=False).mean()

Unnamed: 0,FamilySize,Survived
0,1,0.303538
1,2,0.552795
2,3,0.578431
3,4,0.724138
4,5,0.2
5,6,0.136364
6,7,0.333333
7,8,0.0
8,11,0.0


### Converting Gender to Numerical Values

In [38]:
for data in full_data:
    data['Sex']=data['Sex'].map({'male':0,'female':1}).astype(int)

In [39]:
test_data.loc[test_data['Age'] <= 14]

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin,FamilySize,IsAlone,FamilySizeGroup,Title
5,897,3,"Svensson, Mr. Johan Cervin",0,14.0,0,0,7538,9.225,,S,0,0,0,1,1,Alone,Mr
21,913,3,"Olsen, Master. Artur Karl",0,9.0,0,1,C 17368,3.1708,,S,0,0,0,2,0,small,Master
55,947,3,"Rice, Master. Albert",0,10.0,4,1,382652,29.125,,Q,0,2,0,6,0,Big,Master
64,956,1,"Ryerson, Master. John Borie",0,13.0,2,2,PC 17608,262.375,B57 B59 B63 B66,C,1,1,1,5,0,Big,Master
80,972,3,"Boulos, Master. Akar",0,6.0,1,1,2678,15.2458,,C,0,1,0,3,0,small,Master
89,981,2,"Wells, Master. Ralph Lester",0,2.0,1,1,29103,23.0,,S,1,0,0,3,0,small,Master
117,1009,3,"Sandstrom, Miss. Beatrice Irene",1,1.0,1,1,PP 9549,16.7,G6,S,0,0,1,3,0,small,Miss
120,1012,2,"Watt, Miss. Bertha J",1,12.0,0,0,C.A. 33595,15.75,,S,1,0,0,1,1,Alone,Miss
140,1032,3,"Goodwin, Miss. Jessie Allis",1,10.0,5,2,CA 2144,46.9,,S,0,0,0,8,0,Big,Miss
154,1046,3,"Asplund, Master. Filip Oscar",0,13.0,4,2,347077,31.3875,,S,0,0,0,7,0,Big,Master


In [40]:
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin,FamilySize,IsAlone,FamilySizeGroup,Title
0,892,3,"Kelly, Mr. James",0,34.5,0,0,330911,7.8292,,Q,0,2,0,1,1,Alone,Mr
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",1,47.0,1,0,363272,7.0,,S,0,0,0,2,0,small,Mrs
2,894,2,"Myles, Mr. Thomas Francis",0,62.0,0,0,240276,9.6875,,Q,0,2,0,1,1,Alone,Mr
3,895,3,"Wirz, Mr. Albert",0,27.0,0,0,315154,8.6625,,S,0,0,0,1,1,Alone,Mr
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",1,22.0,1,1,3101298,12.2875,,S,0,0,0,3,0,small,Mrs


### Converting Age to Age groups

In [41]:
for data in full_data:    
    data.loc[ data['Age'] <= 14, 'Age'] = 0
    data.loc[(data['Age'] > 14) & (data['Age'] <= 32), 'Age'] = 1
    data.loc[(data['Age'] > 32) & (data['Age'] <= 48), 'Age'] = 2
    data.loc[(data['Age'] > 48) & (data['Age'] <= 64), 'Age'] = 3
    data.loc[ data['Age'] > 64, 'Age'] = 4

In [42]:
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin,FamilySize,IsAlone,FamilySizeGroup,Title
0,892,3,"Kelly, Mr. James",0,2.0,0,0,330911,7.8292,,Q,0,2,0,1,1,Alone,Mr
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",1,2.0,1,0,363272,7.0,,S,0,0,0,2,0,small,Mrs
2,894,2,"Myles, Mr. Thomas Francis",0,3.0,0,0,240276,9.6875,,Q,0,2,0,1,1,Alone,Mr
3,895,3,"Wirz, Mr. Albert",0,1.0,0,0,315154,8.6625,,S,0,0,0,1,1,Alone,Mr
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",1,1.0,1,1,3101298,12.2875,,S,0,0,0,3,0,small,Mrs


### Converting Fare to Fare Groups

In [43]:
for data in full_data:
    data.loc[ data['Fare'] <= 7.91, 'Fare'] = 0
    data.loc[(data['Fare'] > 7.91) & (data['Fare'] <= 14.454), 'Fare'] = 1
    data.loc[(data['Fare'] > 14.454) & (data['Fare'] <= 31), 'Fare']   = 2
    data.loc[ data['Fare'] > 31, 'Fare'] = 3
    data['Fare'] = data['Fare'].astype(int)

### Mapping Titles and Family Size Group to Numerical Values

In [44]:
title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
family_mapping = {"small": 0, "Alone": 1, "Big": 2}
for data in full_data:
    data['Title'] = data['Title'].map(title_mapping)
    data['FamilySizeGroup'] = data['FamilySizeGroup'].map(family_mapping)

In [45]:
train_data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,isMinor,Port,Has_Cabin,FamilySize,IsAlone,FamilySizeGroup,Title
0,1,0,3,"Braund, Mr. Owen Harris",0,1.0,1,0,A/5 21171,0,,S,0,0,0,2,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,2.0,1,0,PC 17599,3,C85,C,0,1,1,2,0,0,3
2,3,1,3,"Heikkinen, Miss. Laina",1,1.0,0,0,STON/O2. 3101282,1,,S,0,0,0,1,1,1,2
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,2.0,1,0,113803,3,C123,S,0,0,1,2,0,0,3
4,5,0,3,"Allen, Mr. William Henry",0,2.0,0,0,373450,1,,S,0,0,0,1,1,1,1


In [46]:
train_data['isMinor'].groupby(train_data['isMinor']).count()

isMinor
0    869
1     22
Name: isMinor, dtype: int64

### Deleting Column that we do not need

In [47]:
del train_data['Name']
del test_data['Name']

Passenger=test_data['PassengerId']

del train_data['PassengerId']
del test_data['PassengerId']

del train_data['SibSp']
del test_data['SibSp']

del train_data['Parch']
del test_data['Parch']

del train_data['FamilySize']
del test_data['FamilySize']

del train_data['Cabin']
del test_data['Cabin']

del train_data['Ticket']
del test_data['Ticket']

del train_data['Embarked']
del test_data['Embarked']


In [48]:
test_data.head()

Unnamed: 0,Pclass,Sex,Age,Fare,isMinor,Port,Has_Cabin,IsAlone,FamilySizeGroup,Title
0,3,0,2.0,0,0,2,0,1,1,1
1,3,1,2.0,0,0,0,0,0,0,3
2,2,0,3.0,1,0,2,0,1,1,1
3,3,0,1.0,1,0,0,0,1,1,1
4,3,1,1.0,1,0,0,0,0,0,3


# Spliting Data

In [49]:
X_train=train_data.drop('Survived',axis=1)
y_train=train_data['Survived']


In [50]:
X_test=test_data.copy()

In [51]:
test_data.head()

Unnamed: 0,Pclass,Sex,Age,Fare,isMinor,Port,Has_Cabin,IsAlone,FamilySizeGroup,Title
0,3,0,2.0,0,0,2,0,1,1,1
1,3,1,2.0,0,0,0,0,0,0,3
2,2,0,3.0,1,0,2,0,1,1,1
3,3,0,1.0,1,0,0,0,1,1,1
4,3,1,1.0,1,0,0,0,0,0,3


## Viewing Data Before Applying ML algorithms

In [52]:
print("*** X Training Values ***\n")
print(X_train.head())
print("\n\n*** y Training Values ***\n")
print(y_train.head())
print("\n\n*** X Testing Values ***\n")
print(X_test.head())

*** X Training Values ***

   Pclass  Sex  Age  Fare  isMinor  Port  Has_Cabin  IsAlone  FamilySizeGroup  \
0       3    0  1.0     0        0     0          0        0                0   
1       1    1  2.0     3        0     1          1        0                0   
2       3    1  1.0     1        0     0          0        1                1   
3       1    1  2.0     3        0     0          1        0                0   
4       3    0  2.0     1        0     0          0        1                1   

   Title  
0      1  
1      3  
2      2  
3      3  
4      1  


*** y Training Values ***

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64


*** X Testing Values ***

   Pclass  Sex  Age  Fare  isMinor  Port  Has_Cabin  IsAlone  FamilySizeGroup  \
0       3    0  2.0     0        0     2          0        1                1   
1       3    1  2.0     0        0     0          0        0                0   
2       2    0  3.0     1        0     2          0     

# Applying ML Algorithms

## 1. LogisticRegression

In [53]:
from sklearn.linear_model import LogisticRegression

In [54]:
log_clf=LogisticRegression()

In [55]:
log_clf.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [56]:
y_predict=log_clf.predict(X_test)

In [57]:
Score=log_clf.score(X_train,y_train)

In [58]:
print('Accuracy Score is: %s' %Score)

Accuracy Score is: 0.8148148148148148


## 2. Support Vector Machine

In [60]:
from sklearn.svm import SVC

svc_clf=SVC()

svc_clf.fit(X_train,y_train)

y_predict=svc_clf.predict(X_test)

Score=svc_clf.score(X_train,y_train)

print('Accuracy Score is: %s' %Score)

Accuracy Score is: 0.835016835016835


In [61]:
y_predict

array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,

In [62]:
submission=pd.DataFrame({'PassengerId':Passenger,'Survived':y_predict})

In [63]:
submission.to_csv('titanic.csv',index=False)

In [64]:
print("Exported")

Exported
