In [19]:
import numpy as np
import pandas as pd

In [20]:
def target_func(x):
    return x*x

def grad_func(x):
    return 2*x

def update_theta(x, eta, gradient):
    return x + eta*gradient

In [21]:
# Gradient descent algorithm

def gd_toy(target_func, grad_func, theta_0, eta):
    theta = theta_0
    value = target_func(theta)
    
    while True:
        gradient = grad_func(theta)
        theta1 = update_theta(theta, -eta, gradient)
        value1 = target_func(theta1)
        
        if np.abs(value-value1) < 0.00000001:
            return theta, value
        else:
            theta = theta1
            value = value1

In [22]:
x = np.linspace(-5, 5, 100)
theta_0 = np.random.random(1)
eta = 0.0001

theta, value = gd_toy(target_func, grad_func, theta_0, eta)
print("____________________________________")
print("The minimum of theta is ", np.round(theta, 8))
print("The minimum value point is", np.round(value, 8))
print("____________________________________")


____________________________________
The minimum of theta is  [0.00499973]
The minimum value point is [2.5e-05]
____________________________________


In [23]:
data1 = pd.read_csv("wine2.csv")
data1.head()

Unnamed: 0,Label,alcohol,malic_acid,ash,alcalinity,magnesium,total_phenols,flavanoids,nonflavanoids_phenols,proanthocyanins,color,hue,od280_315,proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [24]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data1[['alcohol', 'malic_acid', 'ash', 'alcalinity', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoids_phenols', 'proanthocyanins', 'color', 'hue', 'od280_315', 'proline']], data1['Label'], test_size = 0.2, shuffle = True, stratify=data1['Label'])

In [25]:
print("The dimension of the trainset is ", x_train.shape)
print("The dimension of the testset is", x_test.shape)

The dimension of the trainset is  (104, 13)
The dimension of the testset is (26, 13)


In [26]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder

scaler = StandardScaler()
xtrain_scaled = scaler.fit_transform(x_train)
xtest_scaled = scaler.fit_transform(x_test)

In [27]:
from sklearn.linear_model import LogisticRegression

model1 = LogisticRegression(tol=1e-06).fit(xtrain_scaled,y_train)

In [28]:
print("__________________________________")
print("regression coefficients are ", model1.coef_)
print("intercept is " ,model1.intercept_)
print("model score is ", model1.score(xtrain_scaled, y_train))
print("__________________________________")

__________________________________
regression coefficients are  [[-1.44923178 -0.3780916  -0.99387281  1.18868537 -0.29588477 -0.07725611
  -0.21662132  0.13012843  0.17229275 -0.61621461  0.07172492 -0.50052659
  -1.73102586]]
intercept is  [0.24824297]
model score is  1.0
__________________________________


In [29]:
model1.predict_proba(xtest_scaled)

array([[3.12946786e-03, 9.96870532e-01],
       [6.42647170e-03, 9.93573528e-01],
       [9.93075799e-01, 6.92420148e-03],
       [9.99057459e-01, 9.42541368e-04],
       [7.70303416e-01, 2.29696584e-01],
       [9.99249327e-01, 7.50672667e-04],
       [9.24226797e-01, 7.57732031e-02],
       [7.66049550e-04, 9.99233950e-01],
       [2.23795122e-03, 9.97762049e-01],
       [9.87210691e-01, 1.27893087e-02],
       [9.95540659e-01, 4.45934122e-03],
       [2.29935798e-02, 9.77006420e-01],
       [6.11696210e-02, 9.38830379e-01],
       [1.26654508e-02, 9.87334549e-01],
       [2.50573320e-01, 7.49426680e-01],
       [9.80058939e-01, 1.99410612e-02],
       [7.21618065e-03, 9.92783819e-01],
       [7.75909454e-01, 2.24090546e-01],
       [1.35960867e-02, 9.86403913e-01],
       [9.99211514e-01, 7.88485575e-04],
       [9.98899186e-01, 1.10081420e-03],
       [8.94746986e-01, 1.05253014e-01],
       [1.33278621e-01, 8.66721379e-01],
       [5.72086563e-04, 9.99427913e-01],
       [1.828493

In [30]:
y_pred = model1.predict(xtest_scaled)
print("predicted labels are ", y_pred)

predicted labels are  [2 2 1 1 1 1 1 2 2 1 1 2 2 2 2 1 2 1 2 1 1 1 2 2 2 2]


In [31]:
from sklearn import metrics

confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[12  0]
 [ 0 14]]


In [33]:
print(metrics.classification_report(y_test, y_pred, target_names = ['class 1', 'class 2']))

              precision    recall  f1-score   support

     class 1       1.00      1.00      1.00        12
     class 2       1.00      1.00      1.00        14

    accuracy                           1.00        26
   macro avg       1.00      1.00      1.00        26
weighted avg       1.00      1.00      1.00        26

