In [1]:
# prompt: mount google drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [2]:
import torch
import numpy as np
import pandas as pd
from torch import tensor


In [3]:
train_df=pd.read_csv("/content/drive/MyDrive/Dataset/adult_train.csv")
test_df=pd.read_csv("/content/drive/MyDrive/Dataset/adult_test.csv")

In [4]:
train_df.head()

Unnamed: 0,age,fnlwgt,educational-num,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income,log_capital_gain,log_capital_loss,Private_Workclass,education_label,marital_label,white_collar_label,gender_male,native_country_label,high_income
0,81,36147,15,0,1,1,10605,0,2,38,50,9.269175,0.0,1,0,1,0,1,1,0
1,46,525848,7,0,1,1,0,0,48,38,50,0.0,0.0,0,0,1,0,1,1,1
2,39,77516,13,1,1,1,2174,0,40,38,50,7.684784,0.0,0,1,0,1,1,1,1
3,39,179137,9,1,1,0,0,0,40,38,50,0.0,0.0,1,0,0,1,0,1,1
4,46,215943,9,4,1,0,0,0,40,38,50,0.0,0.0,1,0,0,1,0,1,1


In [5]:
train_df.shape

(38388, 20)

In [6]:
test_df.shape

(9597, 20)

In [7]:
label_data=tensor(train_df.high_income,dtype=torch.int)
label_data

tensor([0, 1, 1,  ..., 1, 0, 1], dtype=torch.int32)

In [8]:
feature=train_df[train_df.columns[: -1]].astype(float)
feature_data=tensor(feature.values,dtype=torch.float)
feature_data

tensor([[8.1000e+01, 3.6147e+04, 1.5000e+01,  ..., 0.0000e+00, 1.0000e+00,
         1.0000e+00],
        [4.6000e+01, 5.2585e+05, 7.0000e+00,  ..., 0.0000e+00, 1.0000e+00,
         1.0000e+00],
        [3.9000e+01, 7.7516e+04, 1.3000e+01,  ..., 1.0000e+00, 1.0000e+00,
         1.0000e+00],
        ...,
        [3.0000e+01, 2.8946e+05, 1.3000e+01,  ..., 1.0000e+00, 0.0000e+00,
         1.0000e+00],
        [3.9000e+01, 3.0163e+05, 1.0000e+01,  ..., 1.0000e+00, 0.0000e+00,
         1.0000e+00],
        [2.2000e+01, 2.4477e+05, 7.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         1.0000e+00]])

# One Step of Linear Model


```
Initialize weights randomly and normalize data
Calculate Prediction (weights * features) {  h(x)=W∗x  }
Calculate Loss
Backpropagation
Gradient Step  W=W−lr∗grad
```

In [9]:
feature_data.shape

torch.Size([38388, 19])

In [10]:
label_data.shape

torch.Size([38388])

# Initialize weights randomly

In [11]:
torch.manual_seed(42)
n_params = feature_data.shape[1]
weights = torch.rand(n_params)
weights

tensor([0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
        0.1332, 0.9346, 0.5936, 0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739,
        0.2666])

In [12]:
weights.shape

torch.Size([19])

# Normalization

In [13]:
max_value, _ = feature_data.max(dim=0)
feature_data /= max_value
feature_data

tensor([[0.9000, 0.0243, 0.9375,  ..., 0.0000, 1.0000, 1.0000],
        [0.5111, 0.3528, 0.4375,  ..., 0.0000, 1.0000, 1.0000],
        [0.4333, 0.0520, 0.8125,  ..., 1.0000, 1.0000, 1.0000],
        ...,
        [0.3333, 0.1942, 0.8125,  ..., 1.0000, 0.0000, 1.0000],
        [0.4333, 0.2024, 0.6250,  ..., 1.0000, 0.0000, 1.0000],
        [0.2444, 0.1642, 0.4375,  ..., 0.0000, 0.0000, 1.0000]])

# Calculate Prediction

In [14]:
def calculate_preds(weights, feature_data):
  mults = (feature_data * weights)
  preds = mults.sum(axis=1)
  return preds

In [15]:
preds = calculate_preds(weights, feature_data)
preds[:5]

tensor([5.5894, 4.7198, 6.2342, 4.4509, 5.1177])

# Calculate Loss

In [16]:
def calculate_loss(preds, label_data):
  return torch.abs(preds-label_data).mean()

In [17]:
loss = calculate_loss(preds, label_data)
loss

tensor(4.4011)

# Calculate Gradients and Backpropagation

In [18]:
weights.requires_grad_()

tensor([0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
        0.1332, 0.9346, 0.5936, 0.8694, 0.5677, 0.7411, 0.4294, 0.8854, 0.5739,
        0.2666], requires_grad=True)

In [19]:
preds = calculate_preds(weights, feature_data)
loss = calculate_loss(preds, label_data)
loss

tensor(4.4011, grad_fn=<MeanBackward0>)

In [20]:
# Backpropagation
loss.backward()

In [21]:
weights.grad

tensor([0.4293, 0.1272, 0.6297, 0.2889, 0.9531, 0.6683, 0.0107, 0.0199, 0.4083,
        0.9100, 1.0000, 0.0628, 0.0417, 0.6928, 0.2294, 0.4708, 0.5089, 0.6683,
        0.9129])

# Gradient Step

In [22]:
learning_rate = 0.1

with torch.no_grad():
  weights.sub_(weights.grad * learning_rate)
  preds = calculate_preds(weights, feature_data)
  loss = calculate_loss(preds, label_data)
  print(loss)

tensor(3.7681)


# Setting Up everything together

In [23]:
train_feature_df = train_df[train_df.columns[:-1]].astype(float)
train_features = tensor(train_feature_df.values, dtype=torch.float)
train_labels = tensor(train_df.high_income, dtype=torch.int)

In [24]:
max_value, _ = train_features.max(dim=0)
train_features /= max_value

In [25]:
test_feature_df = test_df[test_df.columns[:-1]].astype(float)
test_features = tensor(test_feature_df.values, dtype=torch.float)
test_labels = tensor(test_df.high_income, dtype=torch.int)

In [26]:
max_value, _ = test_features.max(dim=0)
test_features /= max_value

In [27]:
def init_weights():
  torch.manual_seed(42)
  n_params = train_features.shape[1]
  weights = torch.rand(n_params)
  return weights.requires_grad_()

In [28]:
def calculate_preds(weights, features):
  mults = (features * weights)
  preds = mults.sum(axis=1)
  return preds

In [29]:
def calculate_loss(preds, labels):
  return torch.abs(preds-labels).mean()

In [30]:
def update_weights(weights, lr):
  weights.sub_(weights.grad * lr)
  return

In [31]:
def one_epoch(epoch_no, weights, lr):
  preds = calculate_preds(weights, feature_data)
  loss = calculate_loss(preds, train_labels)
  loss.backward()
  with torch.no_grad(): update_weights(weights, lr)
  print(f"Epoch {epoch_no+1} => Loss: {loss}")

In [32]:
def train_model(epochs, lr):
  weights = init_weights()
  for epoch_no in range(epochs):
    one_epoch(epoch_no, weights, lr)
  return weights

In [33]:
weights = train_model(epochs=15, lr=0.01)

Epoch 1 => Loss: 4.401118278503418
Epoch 2 => Loss: 4.337820053100586
Epoch 3 => Loss: 4.2112226486206055
Epoch 4 => Loss: 4.021326065063477
Epoch 5 => Loss: 3.768131971359253
Epoch 6 => Loss: 3.45163893699646
Epoch 7 => Loss: 3.0718469619750977
Epoch 8 => Loss: 2.628756284713745
Epoch 9 => Loss: 2.122382640838623
Epoch 10 => Loss: 1.5549633502960205
Epoch 11 => Loss: 0.9629470705986023
Epoch 12 => Loss: 0.6690229773521423
Epoch 13 => Loss: 0.8212868571281433
Epoch 14 => Loss: 1.225162148475647
Epoch 15 => Loss: 1.7557687759399414


In [34]:
torch.save(weights, "linear_model_weights.pt")

In [35]:
test_preds = calculate_preds(weights, test_features)
test_preds[:5]

tensor([-0.6887, -1.9811, -1.4544, -1.9354, -1.5654], grad_fn=<SliceBackward0>)

In [36]:
results = test_labels == (test_preds > 0.5).int()
results[:5]

tensor([False, False, False, False, False])

In [37]:
results.float().mean()

tensor(0.2398)

In [38]:
def accuracy(weights, features):
  preds = calculate_preds(weights, features)
  results = test_labels == (preds > 0.5).int()
  return results.float().mean()

In [39]:
trained_weights = torch.load("linear_model_weights.pt")

In [40]:
accuracy(trained_weights, test_features)

tensor(0.2398)