# Logistic Regression

### Objective : Implement Logistic Regression Algorithm on the given dataset

In [1]:
#Importing libraries
import numpy as np
import pandas as pd
import io
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from google.colab import files
data = pd.read_csv('/content/drive/MyDrive/Labs/Sem6_Labs/ML/Lab07/BuyComputer.csv')
data.drop(columns=['User ID',],axis=1,inplace=True)
data.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [4]:
#Declare label as last column in the source file
label = data.iloc[:,-1]
print(label)

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64


In [5]:
#Declaring X as all columns excluding last
X = data.iloc[:,:-1]
print(X)

     Age  EstimatedSalary
0     19            19000
1     35            20000
2     26            43000
3     27            57000
4     19            76000
..   ...              ...
395   46            41000
396   51            23000
397   50            20000
398   36            33000
399   49            36000

[400 rows x 2 columns]


In [6]:
# Splitting data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=0.3, random_state=110)
print(X_test)

     Age  EstimatedSalary
172   26           118000
211   52           150000
288   41            79000
49    31            89000
330   38            51000
..   ...              ...
11    26            52000
169   29            47000
32    21            16000
252   48           134000
363   42            79000

[120 rows x 2 columns]


In [7]:
# Sacaling data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
#Variabes to calculate sigmoid function
y_pred = []
len_x = len(X_train[0])
w = []
b = 0.2
print(len_x)

2


In [9]:
entries = len(X_train[:,0])
entries

280

In [10]:
for weights in range(len_x):
  w.append(0)
w

[0, 0]

In [13]:
# Sigmoid function
def sigmoid(z):
  return 1.0/(1 + np.exp(-z))

In [12]:
def gradients(X, y, y_hat):
    m = X.shape[0]
    dw = (1/m)*np.dot(X.T, (y_hat - y))
    db = (1/m)*np.sum((y_hat - y)) 
    return dw, db

In [14]:
def normalize(X):
    m, n = X.shape
    for i in range(n):
        X = (X - X.mean(axis=0))/X.std(axis=0)
    return X

In [15]:
#Loss function
def loss_func(y,a):
  J = -np.mean(y*(np.log(a)) + (1-y)*np.log(1-a))
  return J

In [16]:
def train(X, y, bs, epochs, lr):
    m, n = X.shape
    w = np.zeros((n,1))
    b = 0
    y = y.values.reshape(m,1)
    x = normalize(X)
    losses = []
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):
            start_i = i*bs
            end_i = start_i + bs
            xb = X[start_i:end_i]
            yb = y[start_i:end_i]
            y_hat = sigmoid(np.dot(xb, w) + b)
            dw, db = gradients(xb, yb, y_hat)
            w -= lr*dw
            b -= lr*db
        l = loss_func(y, sigmoid(np.dot(X, w) + b))
        losses.append(l)
    return w, b, losses

In [17]:
def predicts(inputs):
  x = normalize(inputs)
  preds = sigmoid(np.dot(inputs,w)+b)
  pred_class = []
  pred_class = [1 if i>0.5 else 0 for i in preds]
  return np.array(pred_class)

In [18]:
w, b, l = train(X_train,y_train, bs=100, epochs=3000, lr=0.01)

In [20]:
from sklearn.metrics import accuracy_score

y_predict=predicts(X_test)
print("Accuracy : ",accuracy_score(y_test,y_predict))

Accuracy :  0.85


---

# 3.1 Logistic Regression model using sklearn 

In [29]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(random_state = 110)


In [30]:
# Splitting data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=0.3, random_state=110)

In [31]:
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

In [32]:
clf = LR.fit(X_train,y_train)

In [33]:
y_pred = clf.predict(X_test)
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0]


In [34]:
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test,y_pred))

0.6166666666666667


In [35]:
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
cm7 = confusion_matrix(y_test, y_pred)
print(cm7)

[[74  0]
 [46  0]]


In [36]:
import sklearn.metrics as metrics
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.62      1.00      0.76        74
           1       0.00      0.00      0.00        46

    accuracy                           0.62       120
   macro avg       0.31      0.50      0.38       120
weighted avg       0.38      0.62      0.47       120



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### 5. Use the model to identify if a person whose age is 28 years and his/her estimated salary is 76000 will purchase a computer?

In [37]:
output = clf.predict([[28,7600]])
# X_test = sc.transform([])
print(output)
#NOT purchase

[0]


