In [37]:
import pandas as pd

In [38]:
# Loading data
data = pd.read_csv('Prodigy University Dataset.csv')
# Split the data into features (X) and target (y)
data.head()

Unnamed: 0,sat_sum,hs_gpa,fy_gpa
0,508,3.4,3.18
1,488,4.0,3.33
2,464,3.75,3.25
3,380,3.75,2.42
4,428,4.0,2.63


In [39]:
# Converting data to numpy
X = data[['sat_sum', 'hs_gpa']].values
# reshape the fy_gpa into a 2D array with [data_size] rows and 1 column
y = data['fy_gpa'].values.reshape(-1, 1)
print(X.shape)
print(y.shape)

(1000, 2)
(1000, 1)


In [40]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [41]:
from sklearn.preprocessing import StandardScaler

# Normalize the features so that it is easier to train the data
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)

In [42]:
X_train.shape

(800, 2)

In [43]:
import torch
# Convert numpy to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [44]:
import torch.nn as nn

In [45]:
# Building model with 2 neurons
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)

In [46]:
# Forward Propagation
preds = model(X_train_tensor)

In [47]:
from torch.nn import MSELoss

In [48]:
preds[:5]

tensor([[0.5389],
        [0.5550],
        [0.6678],
        [0.5743],
        [0.6381]], grad_fn=<SliceBackward0>)

In [49]:
# Calculating Loss
criterion = MSELoss()
loss = criterion(preds, y_train_tensor)
print(loss)
# to learners: You may get different values

tensor(4.0230, grad_fn=<MseLossBackward0>)


# Comparing predictions on X_train with Target

In [50]:
preds[:5]

tensor([[0.5389],
        [0.5550],
        [0.6678],
        [0.5743],
        [0.6381]], grad_fn=<SliceBackward0>)

In [51]:
y_train_tensor[:5]

tensor([[2.0000],
        [3.1100],
        [1.6300],
        [3.0200],
        [1.5500]])

In [52]:
model[0].weight

Parameter containing:
tensor([[ 0.6477, -0.2124],
        [-0.6312, -0.6940]], requires_grad=True)

In [53]:
model[2].weight

Parameter containing:
tensor([[0.4316, 0.3408]], requires_grad=True)

---

Run all the cells above till here!

# Optimization and Backpropogation

In [54]:
import torch.optim as optim #optimizer to update the weights

optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [55]:
loss.backward() #run the backpropagation

In [56]:
optimizer.step() #apply the updated weights to our model

In [57]:
model[0].weight

Parameter containing:
tensor([[ 0.6477, -0.2123],
        [-0.6311, -0.6939]], requires_grad=True)

In [58]:
model[2].weight

Parameter containing:
tensor([[0.4338, 0.3428]], requires_grad=True)

In [70]:
from torch.utils.data import TensorDataset, DataLoader

In [71]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)

In [72]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [73]:
# performance on train  and test sets  before training
train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
print(f'Without Training:\nTrain Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Without Training:
Train Loss: 7.8984, Test Loss: 8.1901


In [74]:
# Looking at predictions
model(X_train_tensor)[:5]

tensor([[-0.2140],
        [-0.2049],
        [-0.1338],
        [-0.2462],
        [-0.1054]], grad_fn=<SliceBackward0>)

# Stochastic Gradient Descent

In [None]:
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
# Execute the training loop
for epoch in range(10):
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad() #reset the gradient done in the previous step
        loss.backward()
        optimizer.step()

    train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
    # print(epoch,': ', train_loss)
    test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 1: Train Loss: 0.5396, Test Loss: 0.6067
Epoch 2: Train Loss: 0.4566, Test Loss: 0.5064
Epoch 3: Train Loss: 0.4251, Test Loss: 0.4716
Epoch 4: Train Loss: 0.4033, Test Loss: 0.4504
Epoch 5: Train Loss: 0.3882, Test Loss: 0.4333
Epoch 6: Train Loss: 0.3769, Test Loss: 0.4240
Epoch 7: Train Loss: 0.3691, Test Loss: 0.4179
Epoch 8: Train Loss: 0.3634, Test Loss: 0.4106
Epoch 9: Train Loss: 0.3591, Test Loss: 0.4082
Epoch 10: Train Loss: 0.3565, Test Loss: 0.4074


In [76]:
# Looking at predictions
model(X_train_tensor)[:5]

tensor([[2.4551],
        [2.4190],
        [2.1026],
        [2.5480],
        [2.0118]], grad_fn=<SliceBackward0>)

# Batch Gradient Descent

In [77]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [78]:
train_loader = DataLoader(train_data, batch_size=800, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(1000): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 100 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 100: Train Loss: 3.7831, Test Loss: 3.9952
Epoch 200: Train Loss: 2.4660, Test Loss: 2.6429
Epoch 300: Train Loss: 1.6997, Test Loss: 1.8493
Epoch 400: Train Loss: 1.2554, Test Loss: 1.3839
Epoch 500: Train Loss: 0.9974, Test Loss: 1.1096
Epoch 600: Train Loss: 0.8461, Test Loss: 0.9459
Epoch 700: Train Loss: 0.7554, Test Loss: 0.8456
Epoch 800: Train Loss: 0.6990, Test Loss: 0.7819
Epoch 900: Train Loss: 0.6619, Test Loss: 0.7391
Epoch 1000: Train Loss: 0.6358, Test Loss: 0.7086


# Mini-Batch Gradient Descent

In [79]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [80]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 0.7694, Test Loss: 0.8649
Epoch 100: Train Loss: 0.5150, Test Loss: 0.5748
Epoch 150: Train Loss: 0.4644, Test Loss: 0.5176
Epoch 200: Train Loss: 0.4309, Test Loss: 0.4821
Epoch 250: Train Loss: 0.4074, Test Loss: 0.4578
Epoch 300: Train Loss: 0.3910, Test Loss: 0.4411
Epoch 350: Train Loss: 0.3792, Test Loss: 0.4294
Epoch 400: Train Loss: 0.3708, Test Loss: 0.4213
Epoch 450: Train Loss: 0.3647, Test Loss: 0.4156
Epoch 500: Train Loss: 0.3604, Test Loss: 0.4114
