## Importing the standard libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## Loading the dataset

In [2]:
data = pd.read_csv('Data.csv')
data

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes
5,France,35.0,58000.0,Yes
6,Spain,,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


In [3]:
## Dropping the missing values 

In [4]:
data.dropna(inplace = True)
data

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
5,France,35.0,58000.0,Yes
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


## Split the data into X and y

In [5]:
X = data.drop('Purchased', axis = 1)
y = data['Purchased']

## Performing train test split

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [7]:
print(X_train)

   Country   Age   Salary
1    Spain  27.0  48000.0
3    Spain  38.0  61000.0
0   France  44.0  72000.0
5   France  35.0  58000.0
8  Germany  50.0  83000.0
9   France  37.0  67000.0


In [8]:
print(X_test)

   Country   Age   Salary
7   France  48.0  79000.0
2  Germany  30.0  54000.0


In [9]:
print(y_train)

1    Yes
3     No
0     No
5    Yes
8     No
9    Yes
Name: Purchased, dtype: object


In [10]:
print(y_test)

7    Yes
2     No
Name: Purchased, dtype: object


## Feature Scaling

- Note : Applied only on Numerical Columns

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [12]:
X_train[['Age', 'Salary']] = scaler.fit_transform(X_train[['Age', 'Salary']])

In [13]:
X_train

Unnamed: 0,Country,Age,Salary
1,Spain,-1.601191,-1.525234
3,Spain,-0.069617,-0.347331
0,France,0.765787,0.649357
5,France,-0.487319,-0.619155
8,Germany,1.601191,1.646045
9,France,-0.208851,0.196317


In [14]:
X_test[['Age', 'Salary']] = scaler.fit_transform(X_test[['Age', 'Salary']])

In [16]:
X_test

Unnamed: 0,Country,Age,Salary
7,France,1.0,1.0
2,Germany,-1.0,-1.0


## Feature Scaling on titanic data

In [18]:
titanic = pd.read_csv('titanic.csv')
titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


- Age, Fare are the columns on which Feature Scaling needs to applied

In [20]:
X = titanic.drop('Survived', axis = 1)
y = titanic['Survived']

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [22]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [23]:
X_train[['Age', 'Fare']] = scaler.fit_transform(X_train[['Age', 'Fare']])

In [24]:
X_train

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
317,318,2,"Moraweck, Dr. Ernest",male,1.690272,0,0,29011,-0.380947,,S
769,770,3,"Gronnestad, Mr. Daniel Danielsen",male,0.159469,0,0,8471,-0.496275,,S
626,627,2,"Kirkland, Rev. Charles Leonard",male,1.899018,0,0,219533,-0.414701,,Q
48,49,3,"Samaan, Mr. Youssef",male,,2,0,2662,-0.223850,,C
450,451,2,"West, Mr. Edwy Arthur",male,0.437797,1,2,C.A. 34651,-0.099657,,S
...,...,...,...,...,...,...,...,...,...,...,...
523,524,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,0.994452,0,1,111361,0.518754,B18,C
135,136,2,"Richard, Mr. Emile",male,-0.466769,0,0,SC/PARIS 2133,-0.359552,,C
806,807,1,"Andrews, Mr. Thomas Jr",male,0.646543,0,0,112050,-0.667350,A36,S
877,878,3,"Petroff, Mr. Nedelio",male,-0.745097,0,0,349212,-0.505823,,S


In [25]:
X_test[['Age', 'Fare']] = scaler.fit_transform(X_test[['Age', 'Fare']])

In [26]:
X_test

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
63,64,3,"Skoog, Master. Harald",male,-1.699988,3,2,347088,-0.050244,,S
719,720,3,"Johnson, Mr. Malkolm Joackim",male,0.221132,0,0,347062,-0.432602,,S
314,315,2,"Hart, Mr. Benjamin",male,0.883587,1,1,F.C.C. 13529,-0.081593,,S
624,625,3,"Bowen, Mr. David John ""Dai""",male,-0.573814,0,0,54636,-0.274434,,S
76,77,3,"Staneff, Mr. Ivan",male,,0,0,349208,-0.430307,,S
...,...,...,...,...,...,...,...,...,...,...,...
517,518,3,"Ryan, Mr. Patrick",male,,0,0,371110,-0.121491,,Q
141,142,3,"Nysten, Miss. Anna Sofia",female,-0.507568,0,0,347081,-0.433077,,S
803,804,3,"Thomas, Master. Assad Alexander",male,-1.937147,0,1,2625,-0.418510,,C
699,700,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,0.817342,0,0,348121,-0.434977,F G63,S
