In [None]:
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("data/kaggle/diabetes/diabetes.csv")
# kaggle dataset
# https://www.kaggle.com/uciml/pima-indians-diabetes-database

In [None]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
df.shape

(768, 9)

In [None]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [None]:
df["Outcome"].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [None]:
X = df[
    [
        "Pregnancies",
        "Glucose",
        "BloodPressure",
        "SkinThickness",
        "Insulin",
        "BMI",
        "DiabetesPedigreeFunction",
        "Age",
    ]
].values
y = df["Outcome"].values
# or y = df['Outcome'].values
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)
print(type(X), "\n", type(y))
# X = torch.from_numpy(X).type(torch.FloatTensor)

<class 'torch.Tensor'> 
 <class 'torch.Tensor'>


In [None]:
X.shape

torch.Size([768, 8])

In [None]:
y.shape

torch.Size([768])

In [None]:
train_end_index = int(0.8 * len(X))
X_train, X_test = X[:train_end_index], X[train_end_index:]
y_train, y_test = y[:train_end_index], y[train_end_index:]

In [None]:
print("Shape of X_train: ", X_train.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of y_test: ", y_test.shape)

Shape of X_train:  torch.Size([614, 8])
Shape of y_train:  torch.Size([614])
Shape of X_test:  torch.Size([154, 8])
Shape of y_test:  torch.Size([154])


In [None]:
class LogisticRegression(torch.nn.Module):  # inheriting from nn.Module
    def __init__(self, input_dimensions, output_dimensions):
        # calls the init function of nn.Module
        super().__init__()  # call super constructor
        self.linear = torch.nn.Linear(input_dimensions, output_dimensions, bias=True)
        # self.linear.weight = torch.nn.Parameter(torch.Tensor(input_dimensions))
        print(self.linear.weight.shape)  # ([1, 8])
        print(self.linear.bias.shape)
        # creates a 1 layer feed forward network with in inputs and out outputs.
        # weights = (out, in); bias = (out)

    def forward(self, X):
        y_pred = torch.sigmoid(self.linear(X))
        return y_pred


# dimension of weights = output/input

In [None]:
samples, input_features = X_train.shape
output_features = 1
print(input_features, ",", output_features)
logreg = LogisticRegression(input_features, output_features)

8 , 1
torch.Size([1, 8])
torch.Size([1])


In [None]:
# loss
criterion = torch.nn.BCELoss()
# Binary Cross-Entropy (binary logarithmic loss function) - for binary classification

# optimizer - to update weights
learning_rate = 0.0001
iterations = 100000
optimizer = torch.optim.SGD(logreg.parameters(), lr=learning_rate)
# model.parameters() --> learnable parameters for the optimizer

In [None]:
for i in range(iterations):
    # logreg.train()

    # Forward propagation
    y_pred = logreg.forward(X_train)  # torch.Size([614, 1])
    # print(y_pred.shape, y_train.shape)
    # https://stackoverflow.com/questions/57798033/valueerror-target-size-torch-size16-must-be-the-same-as-input-size-torch
    y_pred = y_pred.squeeze()  # y_pred.view(y_pred.shape[0]) # try squeeze
    loss = criterion(y_pred, y_train)
    # print(y_pred.shape)
    # Backward propagation
    loss.backward()  # calculates gradients
    optimizer.step()  # Performs a single optimization step (parameter update).
    optimizer.zero_grad()  # flushes previous value of gradients

    if i % (iterations / 10) == 0:
        print("Iteration: {}, Loss: {}".format(i, loss))

Iteration: 0, Loss: 5.674142360687256
Iteration: 10000, Loss: 0.5969590544700623
Iteration: 20000, Loss: 0.595583438873291
Iteration: 30000, Loss: 0.5942579507827759
Iteration: 40000, Loss: 0.5929510593414307
Iteration: 50000, Loss: 0.5916601419448853
Iteration: 60000, Loss: 0.5903834104537964
Iteration: 70000, Loss: 0.5891261100769043
Iteration: 80000, Loss: 0.5878816843032837
Iteration: 90000, Loss: 0.5866446495056152


In [44]:
with torch.no_grad():
    y_predicted = logreg(X_test)
    y_predicted_cls = y_predicted.round()
    acc = y_predicted_cls.eq(y_test).sum() / float(y_test.shape[0])
    print(f"Accuracy score at test time: {acc.item():.4f}")

Accuracy score at test time: 92.1429


---

In [45]:
import torch.nn as nn

m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
output = loss(m(input), target)
output.backward()

In [32]:
input.shape

torch.Size([3])

In [33]:
x = torch.randn((2, 3))
x

tensor([[-0.3739,  0.9067,  0.1842],
        [ 0.4629, -1.9096, -1.7016]])

In [34]:
x + 2

tensor([[1.6261, 2.9067, 2.1842],
        [2.4629, 0.0904, 0.2984]])

In [35]:
torch.tensor(8)

tensor(8)