In [None]:
#Data.csv

**Step 1: Importing the libraries**

In [66]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [67]:
df=pd.read_csv('Data.csv')

**Step 2: Importing dataset**

In [68]:
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes
5,France,35.0,58000.0,Yes
6,Spain,,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Country    10 non-null     object 
 1   Age        9 non-null      float64
 2   Salary     9 non-null      float64
 3   Purchased  10 non-null     object 
dtypes: float64(2), object(2)
memory usage: 448.0+ bytes


In [70]:
df.isnull().sum()

Country      0
Age          1
Salary       1
Purchased    0
dtype: int64

In [71]:
a=df['Age'].median()

In [72]:
df['Age'].fillna(value = a, inplace = True)

In [73]:
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes
5,France,35.0,58000.0,Yes
6,Spain,38.0,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


**Step 3: Handling the missing data**

In [78]:
b = df['Salary'].mean()

In [79]:
df['Salary'].fillna(value = b, inplace = True)

In [80]:
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,63780.0,Yes
5,France,35.0,58000.0,Yes
6,Spain,38.0,52000.0,No
7,France,48.0,79000.0,Yes
8,Germany,50.0,83000.0,No
9,France,37.0,67000.0,Yes


**Step 4: Encoding categorical data**

In [81]:
df['Purchased']=df['Purchased'].map({'Yes':1,'No':0})

In [82]:
df

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,0
1,Spain,27.0,48000.0,1
2,Germany,30.0,54000.0,0
3,Spain,38.0,61000.0,0
4,Germany,40.0,63780.0,1
5,France,35.0,58000.0,1
6,Spain,38.0,52000.0,0
7,France,48.0,79000.0,1
8,Germany,50.0,83000.0,0
9,France,37.0,67000.0,1


**Step 5: Creating a dummy variable**

In [83]:
df1 = pd.get_dummies(df.Country)

In [84]:
df1

Unnamed: 0,France,Germany,Spain
0,1,0,0
1,0,0,1
2,0,1,0
3,0,0,1
4,0,1,0
5,1,0,0
6,0,0,1
7,1,0,0
8,0,1,0
9,1,0,0


In [85]:
merge = pd.concat([df,df1],axis='columns')
merge

Unnamed: 0,Country,Age,Salary,Purchased,France,Germany,Spain
0,France,44.0,72000.0,0,1,0,0
1,Spain,27.0,48000.0,1,0,0,1
2,Germany,30.0,54000.0,0,0,1,0
3,Spain,38.0,61000.0,0,0,0,1
4,Germany,40.0,63780.0,1,0,1,0
5,France,35.0,58000.0,1,1,0,0
6,Spain,38.0,52000.0,0,0,0,1
7,France,48.0,79000.0,1,1,0,0
8,Germany,50.0,83000.0,0,0,1,0
9,France,37.0,67000.0,1,1,0,0


In [86]:
final_df=merge.drop(['Country','Spain'],axis='columns')

In [87]:
final_df

Unnamed: 0,Age,Salary,Purchased,France,Germany
0,44.0,72000.0,0,1,0
1,27.0,48000.0,1,0,0
2,30.0,54000.0,0,0,1
3,38.0,61000.0,0,0,0
4,40.0,63780.0,1,0,1
5,35.0,58000.0,1,1,0
6,38.0,52000.0,0,0,0
7,48.0,79000.0,1,1,0
8,50.0,83000.0,0,0,1
9,37.0,67000.0,1,1,0


**Step 6: Splitting the datasets into training sets and Test sets**

In [88]:
x = final_df[['Age', 'Salary','France','Germany']].values

In [89]:
y = df['Purchased'].values

In [90]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)

In [91]:
x_train

array([[3.800e+01, 5.200e+04, 0.000e+00, 0.000e+00],
       [4.000e+01, 6.378e+04, 0.000e+00, 1.000e+00],
       [4.400e+01, 7.200e+04, 1.000e+00, 0.000e+00],
       [3.800e+01, 6.100e+04, 0.000e+00, 0.000e+00],
       [2.700e+01, 4.800e+04, 0.000e+00, 0.000e+00],
       [4.800e+01, 7.900e+04, 1.000e+00, 0.000e+00],
       [5.000e+01, 8.300e+04, 0.000e+00, 1.000e+00],
       [3.500e+01, 5.800e+04, 1.000e+00, 0.000e+00]])

**Step 7: Feature Scaling**

In [92]:
scaler = StandardScaler()
scaler.fit(x_train) 
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test) 

In [93]:
x_train

array([[-0.28942984, -1.07815151, -0.77459667, -0.57735027],
       [ 0.        , -0.06996538, -0.77459667,  1.73205081],
       [ 0.57885968,  0.63353971,  1.29099445, -0.57735027],
       [-0.28942984, -0.30789046, -0.77459667, -0.57735027],
       [-1.88129397, -1.42048975, -0.77459667, -0.57735027],
       [ 1.15771937,  1.23263164,  1.29099445, -0.57735027],
       [ 1.44714921,  1.57496989, -0.77459667,  1.73205081],
       [-0.72357461, -0.56464414,  1.29099445, -0.57735027]])

In [94]:
model = LogisticRegression() 
model.fit(x_train,y_train) 
y_pred=model.predict(x_test)

In [95]:
model.intercept_[0]

0.006367960159276079

In [96]:
model.coef_[0]

array([-0.621196  , -0.14781646,  0.61407439,  0.38453526])