### This NoteBook provides some Notes about Classification for Custom DataSet using Logistic Regression

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as f
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

import os

In [3]:
## config
device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Read and prepare the Data

In [4]:
## read the data
file_path = os.path.join(os.getcwd(), 'data', 'DryBeans', 'train.csv')

df_train = pd.read_csv(r'C:\Users\moham\Desktop\Hands-on PyTorch\03- Deep Learning\data\DryBeans\train.csv')
df_train.head()

Unnamed: 0,ID,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,y
0,0,58238,971.303,397.202654,186.94551,2.124697,0.882317,58977,272.306674,0.604756,0.98747,0.775723,0.685561,0.00682,0.000929,0.469994,0.998595,HOROZ
1,1,44515,757.601,265.59047,213.967453,1.241266,0.59242,44780,238.07196,0.771745,0.994082,0.97462,0.896387,0.005966,0.002376,0.80351,0.99737,SEKER
2,2,31662,653.783,239.902428,168.421505,1.424417,0.712136,32034,200.781748,0.801407,0.988387,0.930853,0.836931,0.007577,0.002293,0.700453,0.997737,DERMASON
3,3,38778,734.02,268.446281,184.061923,1.458456,0.727925,39208,222.201897,0.766137,0.989033,0.904439,0.827733,0.006923,0.002005,0.685142,0.99925,DERMASON
4,4,42530,775.403,281.212824,193.236878,1.455275,0.726511,43028,232.703412,0.729816,0.988426,0.888895,0.827499,0.006612,0.001912,0.684755,0.996507,SIRA


In [4]:
## split the data
X = df_train.iloc[:, 1:-1]
y = df_train['y']

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, 
                                                      shuffle=True, random_state=42)

# Convert Class String labels into Integers
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_valid_encoded = encoder.transform(y_valid)
## reshape them
y_train_encoded = y_train_encoded.reshape(-1, 1)
y_valid_encoded = y_valid_encoded.reshape(-1, 1)

# Normalize the input features of the dataset
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)

## convert to float32 then to tensors
X_train_scaled = torch.from_numpy(X_train_scaled.astype(np.float32))
X_valid_scaled = torch.from_numpy(X_valid_scaled.astype(np.float32))
y_train_encoded = torch.from_numpy(y_train_encoded.astype(np.float32))
y_valid_encoded = torch.from_numpy(y_valid_encoded.astype(np.float32))

##  for target  --> this is a MUST
y_train_encoded = y_train_encoded.type(torch.LongTensor)
y_valid_encoded = y_valid_encoded.type(torch.LongTensor)

In [5]:
## if you do not do that you will catch an error
y_train_encoded = torch.squeeze(y_train_encoded)
y_valid_encoded = torch.squeeze(y_valid_encoded)

#### Build The Model

In [6]:
## Create a class for Logistic Regression
class LogisticRegression(nn.Module):
    def __init__(self, input_features):
        super(LogisticRegression, self).__init__()
        
        self.linear = nn.Linear(input_features, 7) ## --> 1 output class
    
    def forward(self, x):
        y_pred = self.linear(x)  ## no Softmax for multiclass --< CrossEtropy handles this
        return y_pred
    


## intance 
log_reg = LogisticRegression(input_features=X_train_scaled.shape[1])


## Training
print('Training started __________________________________ \n')

## criteria
learning_rate = 0.01
n_epochs = 10000

loss = nn.CrossEntropyLoss()   
optimizer = torch.optim.SGD(log_reg.parameters(), lr=learning_rate)


for epoch in range(n_epochs):
    ## predicting
    y_pred_train = log_reg(X_train_scaled)
    ## loss
    l = loss(y_pred_train, y_train_encoded)
    ## backpropagation
    l.backward()
    ## optimizaion step
    optimizer.step()
    ## empty the gradients
    optimizer.zero_grad()


print('Training finished ____________________________________ \n') 

_, y_pred_train_cls = torch.max(y_pred_train, dim=1)
train_acc = (y_pred_train_cls==y_train_encoded).sum()/y_train_encoded.shape[0]
print(f'Training Accuracy is --> {train_acc*100:.3f} %')

Training started __________________________________ 

Training finished ____________________________________ 

Training Accuracy is --> 91.496 %


#### Evaluation

In [7]:
## when you evaluate you must stop gradients to not kick the requires_grad=True
with torch.no_grad():
    y_pred_valid = log_reg(X_valid_scaled)
    
    _, y_pred_valid_cls = torch.max(y_pred_valid, dim=1)
    valid_acc = (y_pred_valid_cls==y_valid_encoded).sum()/y_valid_encoded.shape[0]
    
    print(f'Validation Accuracy is --> {valid_acc*100:.3f} %')

Validation Accuracy is --> 92.432 %


### Done !