In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Social_Network_Ads.csv')
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
ismale = pd.get_dummies(df['Gender'],drop_first=True)
df = pd.concat([df, ismale], axis=1)
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased,Male
0,15624510,Male,19,19000,0,1
1,15810944,Male,35,20000,0,1
2,15668575,Female,26,43000,0,0
3,15603246,Female,27,57000,0,0
4,15804002,Male,19,76000,0,1


In [4]:
df.corr()

Unnamed: 0,User ID,Age,EstimatedSalary,Purchased,Male
User ID,1.0,-0.000721,0.071097,0.00712,-0.025249
Age,-0.000721,1.0,0.155238,0.622454,-0.073741
EstimatedSalary,0.071097,0.155238,1.0,0.362083,-0.060435
Purchased,0.00712,0.622454,0.362083,1.0,-0.042469
Male,-0.025249,-0.073741,-0.060435,-0.042469,1.0


##Considering Age and Estimated Salary as features

In [5]:
x = df[['Age', 'EstimatedSalary']].values
x = (x - np.mean(x))/np.std(x)
x = np.column_stack((np.ones((x.shape[0],1)),x))
y = df['Purchased'].values

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=35)

In [7]:
def a(x, B):
  return x@B

In [8]:
def sig(a):
  return (1/(1+np.exp(-a)))

In [9]:
def loss(yp,y):
  return (-1/y.shape[0])*(y.T@np.log(yp)+(1-y).T@np.log(1-yp))


In [10]:
B = np.zeros((x_train.shape[1],1))
m = x_train.shape[0]
alpha = 3.5
L =[]
y_train=y_train.reshape(x_train.shape[0],-1)
for i in range(2000):
  k = a(x_train, B)
  z = sig(k)
  dB = (1/m)*np.dot(x_train.T,(z-y_train))
  B = B-dB*alpha
  L.append(loss(z,y_train))
print(L)

[array([[0.69314718]]), array([[0.64718085]]), array([[0.62978161]]), array([[0.62509648]]), array([[0.62200833]]), array([[0.61955987]]), array([[0.61698134]]), array([[0.61306514]]), array([[0.61037435]]), array([[0.60652349]]), array([[0.60424309]]), array([[0.60123562]]), array([[0.59950525]]), array([[0.59735966]]), array([[0.59610461]]), array([[0.5946164]]), array([[0.59371687]]), array([[0.59268471]]), array([[0.59203715]]), array([[0.59131269]]), array([[0.59084108]]), array([[0.59032429]]), array([[0.58997602]]), array([[0.58960122]]), array([[0.58934052]]), array([[0.58906454]]), array([[0.58886704]]), array([[0.58866113]]), array([[0.58851]]), array([[0.58835467]]), array([[0.58823809]]), array([[0.58811986]]), array([[0.58802935]]), array([[0.5879387]]), array([[0.58786808]]), array([[0.58779816]]), array([[0.58774286]]), array([[0.58768869]]), array([[0.58764524]]), array([[0.58760311]]), array([[0.58756892]]), array([[0.58753605]]), array([[0.58750909]]), array([[0.58748

In [11]:
print(B)

[[-0.64869122]
 [ 0.98639559]
 [ 0.97839151]]


In [12]:
k = a(x_test,B)
y_test_p = sig(k)
for i in range(y_test_p.shape[0]):
  if y_test_p[i]>=0.53:
    y_test_p[i]=1
  else :
    y_test_p[i]=0
y_test_p=y_test_p.reshape(-1)

In [13]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_test_p)
print(cm)

[[50  1]
 [15 14]]


In [14]:
acc = (50 + 14)/(50+1+15+14)
print(acc)

0.8


##Considering only Age as feature

In [15]:
x = df[['Age']].values
x = (x - np.mean(x))/np.std(x)
x = np.column_stack((np.ones((x.shape[0],1)),x))
y = df['Purchased'].values

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=35)

In [17]:
B = np.zeros((x_train.shape[1],1))
m = x_train.shape[0]
alpha = 2
L =[]
y_train=y_train.reshape(x_train.shape[0],-1)
for i in range(2000):
  k = a(x_train, B)
  z = sig(k)
  dB = (1/m)*np.dot(x_train.T,(z-y_train))
  B = B-dB*alpha
  L.append(loss(z,y_train))
print(L)

[array([[0.69314718]]), array([[0.54032274]]), array([[0.49036389]]), array([[0.46809103]]), array([[0.45618877]]), array([[0.44913217]]), array([[0.44466295]]), array([[0.4417013]]), array([[0.4396733]]), array([[0.43824994]]), array([[0.43723167]]), array([[0.43649209]]), array([[0.4359483]]), array([[0.43554445]]), array([[0.43524202]]), array([[0.43501397]]), array([[0.43484099]]), array([[0.43470914]]), array([[0.4346082]]), array([[0.43453064]]), array([[0.43447087]]), array([[0.43442467]]), array([[0.43438888]]), array([[0.4343611]]), array([[0.43433949]]), array([[0.43432266]]), array([[0.43430954]]), array([[0.43429929]]), array([[0.43429128]]), array([[0.43428501]]), array([[0.4342801]]), array([[0.43427625]]), array([[0.43427323]]), array([[0.43427087]]), array([[0.43426901]]), array([[0.43426755]]), array([[0.4342664]]), array([[0.4342655]]), array([[0.43426479]]), array([[0.43426423]]), array([[0.4342638]]), array([[0.43426345]]), array([[0.43426318]]), array([[0.43426297]

In [18]:
print(B)

[[-0.9680898 ]
 [ 1.92850225]]


In [19]:
k = a(x_test,B)
y_test_p = sig(k)
for i in range(y_test_p.shape[0]):
  if y_test_p[i]>=0.50:
    y_test_p[i]=1
  else :
    y_test_p[i]=0
y_test_p=y_test_p.reshape(-1)

In [20]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,y_test_p)
print(cm)

[[50  1]
 [ 9 20]]


In [21]:
acc = (50+20)/(50+1+9+20)
print(acc)

0.875
