In [1]:
import pandas as pd
import torch
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

train = torch.tensor(train_df.values, dtype=torch.float64)
test = torch.tensor(test_df.values, dtype=torch.float64)

trainX, trainY = train[:, :-1], train[:, -1].reshape(len(train),1)
testX, testY = test[:, :-1], test[:, -1].reshape(len(test),1)

# print(trainX)
# print(testY)
# print(trainX.shape)
# print(testX.shape)
# print(testY.shape)

In [2]:
## Q1: about how many bytes does trainX consume?

In [3]:
#q1
len(trainX)*len(trainX[0])*trainX.element_size()

83520

In [4]:
## Q2: what is the biggest difference we would have any one cell if we used float16 instead of float64?

In [5]:
#q2
trainX_float16 = trainX.to(dtype=torch.float16)
trainX_float64 = trainX_float16.to(dtype=torch.float64)

absDiff = torch.abs(trainX - trainX_float64)

max_absolute_difference = torch.max(absDiff)

float(max_absolute_difference)

0.0

In [6]:
## Q3: is a CUDA GPU available on your VM?

In [7]:
#q3
torch.cuda.is_available()

False

In [8]:
## Part 2

In [9]:
coef = torch.tensor([
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040], # POS_50_59_CP
        [0.0300], # POS_60_69_CP
        [0.0300],
        [0.0300],
        [0.0300]
], dtype=trainX.dtype)
# coef

In [10]:
## Q4: what is the predicted number of deaths for the first census tract?

In [11]:
#q4
first_predicted = testX[0] @ coef
first_predicted.item()

9.844

In [12]:
## Q5: what is the average number of predicted deaths, over the whole testX dataset?

In [13]:
#q5
all_predicted = testX @ coef
all_predicted.mean().item()

12.073632183908048

In [14]:
## Part 3

In [15]:
## Q6: first, what is y when x is a tensor containing 0.0?

In [16]:
#q6
x = torch.tensor(0.0)
y = x**2 - 8*x + 19
float(y)

19.0

In [17]:
## Q7: what x value minimizes y?

In [18]:
#q7
def f(x):
    return x**2 - 8*x + 19

x = torch.tensor(0.0, requires_grad=True, dtype=torch.float64)
optimizer = torch.optim.SGD([x], lr=0.1)
y = f(x)

for epoch in range(500):
    y = f(x)

    y.backward()
    optimizer.step()
    optimizer.zero_grad()

x.item()

3.999999999999999

In [19]:
## Part 4

In [20]:
## Q8: what is the MSE (mean-square error) when we make predictions using this vector of zero coefficients?

In [21]:
#q8
# print(trainY.shape)
# print(trainX.shape)

coef = torch.zeros((trainX.shape[1], 1), dtype=trainX.dtype)

mse = ((trainX @ coef - trainY)**2).mean().item()
mse

197.8007662835249

In [22]:
# Write a training loop to improve the coefficients. 
# Requirements:

# use the torch.optim.SGD optimizer
# use 0.000002 learning rate
# run for 500 epochs
# use torch.nn.MSELoss for your loss function

In [23]:
## Q9: what is the MSE over the training data, using the coefficients resulting from the above training?

In [24]:
#q9
seed_value = 544
torch.manual_seed(seed_value)
loss_fn2 = torch.nn.MSELoss()

train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

train = torch.tensor(train_df.values, dtype=torch.float64)
test = torch.tensor(test_df.values, dtype=torch.float64)

trainX, trainY = train[:, :-1], train[:, -1].reshape(len(train),1)
testX, testY = test[:, :-1], test[:, -1].reshape(len(test),1)

ds = torch.utils.data.TensorDataset(trainX, trainY)
dl = torch.utils.data.DataLoader(ds, batch_size=50, shuffle=True)
coeff = torch.zeros((10, 1), dtype=torch.float64, requires_grad=True)

optimizer = torch.optim.SGD([coeff], lr=0.000002)

for epoch in range(500):
    for batchX, batchY in dl:
        loss = loss_fn2(batchX @ coeff, batchY)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
loss_fn2(trainX @ coeff, trainY).item()

26.8113940147193

In [25]:
## Q10: what is the MSE over the test data?

In [26]:
#q10
resultX = testX @ coeff
loss_fn2(resultX, testY).item()

29.05854692548551