In [413]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

In [414]:
df = pd.read_csv('/kaggle/input/diabetes-dataset-for-beginners/diabetes.csv')

In [415]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [416]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [417]:
train = df.iloc[:614]
#valid = df.iloc[461:614]
test_df = df.iloc[615:]

In [418]:
X = train.drop('Outcome',axis=1).values
y= train['Outcome'].values

In [419]:
X_train,X_valid,y_train,y_valid = train_test_split(X,y,test_size=0.2,random_state=23)
X_train_tensor =torch.tensor(X_train,dtype=torch.float32)
y_train_tensor =torch.tensor(y_train,dtype=torch.long)
X_valid_tensor =torch.tensor(X_valid,dtype=torch.float32)
y_valid_tensor =torch.tensor(y_valid,dtype=torch.long)

X_test_tensor = torch.tensor(test_df.values, dtype=torch.float32)

In [420]:
X_train_tensor.shape

torch.Size([491, 8])

In [421]:
device = "cuda" if torch.cuda.is_available() else "cpu" #torch.cuda.is_available() checks for your system has gpu or cpu
device

'cuda'

In [422]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
test_dataset = TensorDataset(X_test_tensor)
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [423]:

# for X,y in test_dataloader: #Image - Color image shape (batch_size,number of channel,length,width)
#   print(X.shape)            #Image - Black and white image - number of channels is 1
#   print(y.shape)
#   break 

In [424]:
# Define the Neural Network with Dropout
class Diabetes(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(p=0.05)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        return x
# Instantiate the model
input_size = X_train.shape[1] #Number of features
# hidden_size = 64
# output_size = 2  # Binary classification (Survived or not)
# dropout_rate = 0.5  # Adjust as needed
model = Diabetes(input_size)
model = model.to(device) 

In [425]:
# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [426]:
#steps in the GD : Batch of the input / Pass it to the model / Compute loss function / Update the weights

def train(dataloader,model,loss_fn,optimizer):
  model.train() #putting the model in the training mode
  for batch,(X,y) in enumerate(dataloader):
    #sending the data to the GPU
    X = X.to(device)
    y = y.to(device)

    #Compute predictions
    pred = model(X)

    #Compute loss
    loss = loss_fn(pred,y)

    #Backpropogation
    loss.backward() #Wnew = Wold - lr*dl/dw
    optimizer.step()
    optimizer.zero_grad()

    if batch % 100 == 0:
      print(f'Loss of the Model {loss.item()}')

In [427]:
def valid(dataloader,model,loss_fn):
  model.eval() #putting the model in the training mode
  num_batched = len(dataloader)
  valid_loss, correct = 0,0
  with torch.no_grad(): #We will not compute gradients for the test data
    for X,y in dataloader:
      X = X.to(device)
      y = y.to(device)

      #Compute predictions
      pred = model(X)

      #Compute loss
      valid_loss += loss_fn(pred,y).item()

      #Find how many correct predictions
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  valid_loss = valid_loss/num_batched
  correct = correct/(len(dataloader.dataset))

  print(f'Test Accuracy {100*correct}, Avg_loss : {valid_loss}')


In [428]:
epochs = 6

for t in range(6):
  print(f'Epoch {t+1}')
  train(train_dataloader,model,loss_fn,optimizer)
  valid(valid_dataloader,model,loss_fn)

Epoch 1
Loss of the Model 5.9536356925964355
Test Accuracy 51.21951219512195, Avg_loss : 1.3808582425117493
Epoch 2
Loss of the Model 2.113328695297241
Test Accuracy 55.28455284552846, Avg_loss : 2.198044180870056
Epoch 3
Loss of the Model 2.239844560623169
Test Accuracy 56.09756097560976, Avg_loss : 1.5491724014282227
Epoch 4
Loss of the Model 1.5377240180969238
Test Accuracy 64.22764227642277, Avg_loss : 1.2190889120101929
Epoch 5
Loss of the Model 1.9956353902816772
Test Accuracy 60.97560975609756, Avg_loss : 0.8746638298034668
Epoch 6
Loss of the Model 2.225205421447754
Test Accuracy 60.97560975609756, Avg_loss : 1.0672915577888489
