# Logistic Regression | Machine Learning | Coding Ninjas

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression

## Importing Titanic Dataset

In [2]:
titanic = pd.read_csv("training_titanic.csv")
titanic

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29.0,1,0,228414,26.0000,,S,1
1,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.0500,,S,0
2,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39.0,0,0,250655,26.0000,,S,0
3,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29.0,0,4,349909,21.0750,,S,0
4,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...
663,2,"Ilett, Miss. Bertha",female,17.0,0,0,SO/C 14885,10.5000,,S,1
664,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.7500,,Q,0
665,3,"Bing, Mr. Lee",male,32.0,0,0,1601,56.4958,,S,1
666,3,"Strandberg, Miss. Ida Sofia",female,22.0,0,0,7553,9.8375,,S,0


## Data Cleaning on Train Data

In [3]:
titanic = titanic.drop(['Name','Ticket','Cabin'], axis = 1)
titanic

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Survived
0,2,female,29.0,1,0,26.0000,S,1
1,3,male,,0,0,8.0500,S,0
2,2,male,39.0,0,0,26.0000,S,0
3,3,female,29.0,0,4,21.0750,S,0
4,3,male,25.0,0,0,7.0500,S,0
...,...,...,...,...,...,...,...,...
663,2,female,17.0,0,0,10.5000,S,1
664,3,male,,0,0,7.7500,Q,0
665,3,male,32.0,0,0,56.4958,S,1
666,3,female,22.0,0,0,9.8375,S,0


In [4]:
titanic.describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived
count,668.0,536.0,668.0,668.0,668.0,668.0
mean,2.296407,29.70056,0.528443,0.407186,32.064552,0.402695
std,0.831638,14.240257,1.080327,0.854695,45.320835,0.490808
min,1.0,0.67,0.0,0.0,0.0,0.0
25%,2.0,21.0,0.0,0.0,7.925,0.0
50%,3.0,29.0,0.0,0.0,14.75,0.0
75%,3.0,38.25,1.0,0.0,31.275,1.0
max,3.0,80.0,8.0,6.0,512.3292,1.0


In [5]:
titanic.isna().sum()

Pclass        0
Sex           0
Age         132
SibSp         0
Parch         0
Fare          0
Embarked      1
Survived      0
dtype: int64

In [6]:
titanic["Embarked"].value_counts()

S    484
C    133
Q     50
Name: Embarked, dtype: int64

In [7]:
titanic["Embarked"] = titanic["Embarked"].fillna('S')
titanic

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Survived
0,2,female,29.0,1,0,26.0000,S,1
1,3,male,,0,0,8.0500,S,0
2,2,male,39.0,0,0,26.0000,S,0
3,3,female,29.0,0,4,21.0750,S,0
4,3,male,25.0,0,0,7.0500,S,0
...,...,...,...,...,...,...,...,...
663,2,female,17.0,0,0,10.5000,S,1
664,3,male,,0,0,7.7500,Q,0
665,3,male,32.0,0,0,56.4958,S,1
666,3,female,22.0,0,0,9.8375,S,0


In [8]:
titanic["Age"].fillna(titanic["Age"].mean(),inplace=True)
titanic

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Survived
0,2,female,29.00000,1,0,26.0000,S,1
1,3,male,29.70056,0,0,8.0500,S,0
2,2,male,39.00000,0,0,26.0000,S,0
3,3,female,29.00000,0,4,21.0750,S,0
4,3,male,25.00000,0,0,7.0500,S,0
...,...,...,...,...,...,...,...,...
663,2,female,17.00000,0,0,10.5000,S,1
664,3,male,29.70056,0,0,7.7500,Q,0
665,3,male,32.00000,0,0,56.4958,S,1
666,3,female,22.00000,0,0,9.8375,S,0


In [9]:
def gender(str):
    if str == "male":
        return 0
    else:
        return 1

titanic["Gender"] = titanic["Sex"].apply(gender)
titanic = titanic.drop(['Sex'], axis = 1)
titanic

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Embarked,Survived,Gender
0,2,29.00000,1,0,26.0000,S,1,1
1,3,29.70056,0,0,8.0500,S,0,0
2,2,39.00000,0,0,26.0000,S,0,0
3,3,29.00000,0,4,21.0750,S,0,1
4,3,25.00000,0,0,7.0500,S,0,0
...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,S,1,1
664,3,29.70056,0,0,7.7500,Q,0,0
665,3,32.00000,0,0,56.4958,S,1,0
666,3,22.00000,0,0,9.8375,S,0,1


In [10]:
def embarked(str):
    if str == "S":
        return 2
    elif str == "C":
        return 1
    else:
        return 0

titanic["New_Embarked"] = titanic["Embarked"].apply(embarked)
titanic = titanic.drop(['Embarked'], axis = 1)
titanic

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived,Gender,New_Embarked
0,2,29.00000,1,0,26.0000,1,1,2
1,3,29.70056,0,0,8.0500,0,0,2
2,2,39.00000,0,0,26.0000,0,0,2
3,3,29.00000,0,4,21.0750,0,1,2
4,3,25.00000,0,0,7.0500,0,0,2
...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,1,1,2
664,3,29.70056,0,0,7.7500,0,0,0
665,3,32.00000,0,0,56.4958,1,0,2
666,3,22.00000,0,0,9.8375,0,1,2


In [11]:
titanic.rename(columns = {'New_Embarked':'Embarked'}, inplace = True)
titanic

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived,Gender,Embarked
0,2,29.00000,1,0,26.0000,1,1,2
1,3,29.70056,0,0,8.0500,0,0,2
2,2,39.00000,0,0,26.0000,0,0,2
3,3,29.00000,0,4,21.0750,0,1,2
4,3,25.00000,0,0,7.0500,0,0,2
...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,1,1,2
664,3,29.70056,0,0,7.7500,0,0,0
665,3,32.00000,0,0,56.4958,1,0,2
666,3,22.00000,0,0,9.8375,0,1,2


## Data Cleaning on Test Data

In [12]:
titanic_test = pd.read_csv("test_titanic.csv")
titanic_test

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,2,"Davies, Master. John Morgan Jr",male,8.0,1,1,C.A. 33112,36.7500,,S
1,1,"Leader, Dr. Alice (Farnham)",female,49.0,0,0,17465,25.9292,D17,S
2,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q
3,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Chr...",female,24.0,2,1,243847,27.0000,,S
4,1,"McGough, Mr. James Robert",male,36.0,0,0,PC 17473,26.2875,E25,S
...,...,...,...,...,...,...,...,...,...,...
218,3,"Lindqvist, Mr. Eino William",male,20.0,1,0,STON/O 2. 3101285,7.9250,,S
219,1,"Butt, Major. Archibald Willingham",male,45.0,0,0,113050,26.5500,B38,S
220,1,"Penasco y Castellana, Mrs. Victor de Satode (M...",female,17.0,1,0,PC 17758,108.9000,C65,C
221,3,"Holm, Mr. John Fredrik Alexander",male,43.0,0,0,C 7075,6.4500,,S


In [13]:
titanic_test = titanic_test.drop(['Name','Ticket','Cabin'], axis = 1)
titanic_test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,2,male,8.0,1,1,36.7500,S
1,1,female,49.0,0,0,25.9292,S
2,3,male,,0,0,7.7375,Q
3,2,female,24.0,2,1,27.0000,S
4,1,male,36.0,0,0,26.2875,S
...,...,...,...,...,...,...,...
218,3,male,20.0,1,0,7.9250,S
219,1,male,45.0,0,0,26.5500,S
220,1,female,17.0,1,0,108.9000,C
221,3,male,43.0,0,0,6.4500,S


In [14]:
titanic.describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived,Gender,Embarked
count,668.0,668.0,668.0,668.0,668.0,668.0,668.0,668.0
mean,2.296407,29.70056,0.528443,0.407186,32.064552,0.402695,0.360778,1.651198
std,0.831638,12.753571,1.080327,0.854695,45.320835,0.490808,0.480586,0.614333
min,1.0,0.67,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,23.0,0.0,0.0,7.925,0.0,0.0,1.0
50%,3.0,29.70056,0.0,0.0,14.75,0.0,0.0,2.0
75%,3.0,35.0,1.0,0.0,31.275,1.0,1.0,2.0
max,3.0,80.0,8.0,6.0,512.3292,1.0,1.0,2.0


In [15]:
titanic_test["Embarked"] = titanic_test["Embarked"].fillna('S')
titanic_test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,2,male,8.0,1,1,36.7500,S
1,1,female,49.0,0,0,25.9292,S
2,3,male,,0,0,7.7375,Q
3,2,female,24.0,2,1,27.0000,S
4,1,male,36.0,0,0,26.2875,S
...,...,...,...,...,...,...,...
218,3,male,20.0,1,0,7.9250,S
219,1,male,45.0,0,0,26.5500,S
220,1,female,17.0,1,0,108.9000,C
221,3,male,43.0,0,0,6.4500,S


In [16]:
titanic_test["Age"].fillna(titanic_test["Age"].mean(),inplace=True)
titanic_test

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,2,male,8.000000,1,1,36.7500,S
1,1,female,49.000000,0,0,25.9292,S
2,3,male,29.694775,0,0,7.7375,Q
3,2,female,24.000000,2,1,27.0000,S
4,1,male,36.000000,0,0,26.2875,S
...,...,...,...,...,...,...,...
218,3,male,20.000000,1,0,7.9250,S
219,1,male,45.000000,0,0,26.5500,S
220,1,female,17.000000,1,0,108.9000,C
221,3,male,43.000000,0,0,6.4500,S


In [17]:
def gender(str):
    if str == "male":
        return 0
    else:
        return 1

titanic_test["Gender"] = titanic_test["Sex"].apply(gender)
titanic_test = titanic_test.drop(['Sex'], axis = 1)
titanic_test

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Embarked,Gender
0,2,8.000000,1,1,36.7500,S,0
1,1,49.000000,0,0,25.9292,S,1
2,3,29.694775,0,0,7.7375,Q,0
3,2,24.000000,2,1,27.0000,S,1
4,1,36.000000,0,0,26.2875,S,0
...,...,...,...,...,...,...,...
218,3,20.000000,1,0,7.9250,S,0
219,1,45.000000,0,0,26.5500,S,0
220,1,17.000000,1,0,108.9000,C,1
221,3,43.000000,0,0,6.4500,S,0


In [18]:
def embarked(str):
    if str == "S":
        return 2
    elif str == "C":
        return 1
    else:
        return 0

titanic_test["New_Embarked"] = titanic_test["Embarked"].apply(embarked)
titanic_test = titanic_test.drop(['Embarked'], axis = 1)
titanic_test

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Gender,New_Embarked
0,2,8.000000,1,1,36.7500,0,2
1,1,49.000000,0,0,25.9292,1,2
2,3,29.694775,0,0,7.7375,0,0
3,2,24.000000,2,1,27.0000,1,2
4,1,36.000000,0,0,26.2875,0,2
...,...,...,...,...,...,...,...
218,3,20.000000,1,0,7.9250,0,2
219,1,45.000000,0,0,26.5500,0,2
220,1,17.000000,1,0,108.9000,1,1
221,3,43.000000,0,0,6.4500,0,2


In [19]:
titanic_test.rename(columns = {'New_Embarked':'Embarked'}, inplace = True)
titanic_test

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Gender,Embarked
0,2,8.000000,1,1,36.7500,0,2
1,1,49.000000,0,0,25.9292,1,2
2,3,29.694775,0,0,7.7375,0,0
3,2,24.000000,2,1,27.0000,1,2
4,1,36.000000,0,0,26.2875,0,2
...,...,...,...,...,...,...,...
218,3,20.000000,1,0,7.9250,0,2
219,1,45.000000,0,0,26.5500,0,2
220,1,17.000000,1,0,108.9000,1,1
221,3,43.000000,0,0,6.4500,0,2


## Adding Combinations

In [20]:
for i in titanic:
    if i == "Survived":
        continue
    name = i + '_' + i
    titanic[name] = titanic[i] ** 2
titanic

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived,Gender,Embarked,Pclass_Pclass,Age_Age,SibSp_SibSp,Parch_Parch,Fare_Fare,Gender_Gender,Embarked_Embarked
0,2,29.00000,1,0,26.0000,1,1,2,4,841.000000,1,0,676.000000,1,4
1,3,29.70056,0,0,8.0500,0,0,2,9,882.123247,0,0,64.802500,0,4
2,2,39.00000,0,0,26.0000,0,0,2,4,1521.000000,0,0,676.000000,0,4
3,3,29.00000,0,4,21.0750,0,1,2,9,841.000000,0,16,444.155625,1,4
4,3,25.00000,0,0,7.0500,0,0,2,9,625.000000,0,0,49.702500,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,1,1,2,4,289.000000,0,0,110.250000,1,4
664,3,29.70056,0,0,7.7500,0,0,0,9,882.123247,0,0,60.062500,0,0
665,3,32.00000,0,0,56.4958,1,0,2,9,1024.000000,0,0,3191.775418,0,4
666,3,22.00000,0,0,9.8375,0,1,2,9,484.000000,0,0,96.776406,1,4


In [21]:
for i in titanic_test:
    name = i + '_' + i
    titanic_test[name] = titanic_test[i] ** 2
titanic_test

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Gender,Embarked,Pclass_Pclass,Age_Age,SibSp_SibSp,Parch_Parch,Fare_Fare,Gender_Gender,Embarked_Embarked
0,2,8.000000,1,1,36.7500,0,2,4,64.000000,1,1,1350.562500,0,4
1,1,49.000000,0,0,25.9292,1,2,1,2401.000000,0,0,672.323413,1,4
2,3,29.694775,0,0,7.7375,0,0,9,881.779679,0,0,59.868906,0,0
3,2,24.000000,2,1,27.0000,1,2,4,576.000000,4,1,729.000000,1,4
4,1,36.000000,0,0,26.2875,0,2,1,1296.000000,0,0,691.032656,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218,3,20.000000,1,0,7.9250,0,2,9,400.000000,1,0,62.805625,0,4
219,1,45.000000,0,0,26.5500,0,2,1,2025.000000,0,0,704.902500,0,4
220,1,17.000000,1,0,108.9000,1,1,1,289.000000,1,0,11859.210000,1,1
221,3,43.000000,0,0,6.4500,0,2,9,1849.000000,0,0,41.602500,0,4


In [22]:
def survived(value):
    if value == 1:
        return 1
    else:
        return 0

titanic["New_Survived"] = titanic["Survived"].apply(survived)
titanic = titanic.drop(['Survived'], axis = 1)
titanic.rename(columns = {'New_Survived':'Survived'}, inplace = True)
titanic

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Gender,Embarked,Pclass_Pclass,Age_Age,SibSp_SibSp,Parch_Parch,Fare_Fare,Gender_Gender,Embarked_Embarked,Survived
0,2,29.00000,1,0,26.0000,1,2,4,841.000000,1,0,676.000000,1,4,1
1,3,29.70056,0,0,8.0500,0,2,9,882.123247,0,0,64.802500,0,4,0
2,2,39.00000,0,0,26.0000,0,2,4,1521.000000,0,0,676.000000,0,4,0
3,3,29.00000,0,4,21.0750,1,2,9,841.000000,0,16,444.155625,1,4,0
4,3,25.00000,0,0,7.0500,0,2,9,625.000000,0,0,49.702500,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,1,2,4,289.000000,0,0,110.250000,1,4,1
664,3,29.70056,0,0,7.7500,0,0,9,882.123247,0,0,60.062500,0,0,0
665,3,32.00000,0,0,56.4958,0,2,9,1024.000000,0,0,3191.775418,0,4,1
666,3,22.00000,0,0,9.8375,1,2,9,484.000000,0,0,96.776406,1,4,0


## Separating the Input and Output

In [23]:
titanic_input = titanic.iloc[:,0:14]
titanic_input

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Gender,Embarked,Pclass_Pclass,Age_Age,SibSp_SibSp,Parch_Parch,Fare_Fare,Gender_Gender,Embarked_Embarked
0,2,29.00000,1,0,26.0000,1,2,4,841.000000,1,0,676.000000,1,4
1,3,29.70056,0,0,8.0500,0,2,9,882.123247,0,0,64.802500,0,4
2,2,39.00000,0,0,26.0000,0,2,4,1521.000000,0,0,676.000000,0,4
3,3,29.00000,0,4,21.0750,1,2,9,841.000000,0,16,444.155625,1,4
4,3,25.00000,0,0,7.0500,0,2,9,625.000000,0,0,49.702500,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,2,17.00000,0,0,10.5000,1,2,4,289.000000,0,0,110.250000,1,4
664,3,29.70056,0,0,7.7500,0,0,9,882.123247,0,0,60.062500,0,0
665,3,32.00000,0,0,56.4958,0,2,9,1024.000000,0,0,3191.775418,0,4
666,3,22.00000,0,0,9.8375,1,2,9,484.000000,0,0,96.776406,1,4


In [24]:
titanic_output = titanic.iloc[:,14]
titanic_output

0      1
1      0
2      0
3      0
4      0
      ..
663    1
664    0
665    1
666    0
667    1
Name: Survived, Length: 668, dtype: int64

## Converting Data to Numpy Array

In [25]:
X_train = titanic_input.to_numpy()
Y_train = titanic_output.to_numpy()
X_test = titanic_test.to_numpy()
X_train,Y_train,X_test

(array([[2.00000000e+00, 2.90000000e+01, 1.00000000e+00, ...,
         6.76000000e+02, 1.00000000e+00, 4.00000000e+00],
        [3.00000000e+00, 2.97005597e+01, 0.00000000e+00, ...,
         6.48025000e+01, 0.00000000e+00, 4.00000000e+00],
        [2.00000000e+00, 3.90000000e+01, 0.00000000e+00, ...,
         6.76000000e+02, 0.00000000e+00, 4.00000000e+00],
        ...,
        [3.00000000e+00, 3.20000000e+01, 0.00000000e+00, ...,
         3.19177542e+03, 0.00000000e+00, 4.00000000e+00],
        [3.00000000e+00, 2.20000000e+01, 0.00000000e+00, ...,
         9.67764063e+01, 1.00000000e+00, 4.00000000e+00],
        [3.00000000e+00, 2.97005597e+01, 1.00000000e+00, ...,
         2.40250000e+02, 1.00000000e+00, 0.00000000e+00]]),
 array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
        0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,

## Training the Logistic Regression

In [26]:
clf = LogisticRegression(solver='liblinear', max_iter=100)

### Fit Function

In [27]:
clf.fit(X_train,Y_train)

LogisticRegression(solver='liblinear')

### Predict Function

In [28]:
Y_pred = clf.predict(X_test)
Y_pred

array([0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 0, 0], dtype=int64)

In [29]:
np.savetxt('E:/MACHINE LEARNING/Milestone 2/Module 6 - Project Logistic Regression/pred2.csv', Y_pred)