# Part 1: Setup

In [1]:
import pandas as pd
import torch

In [2]:
# use pd.read_csv to load CSVs to DataFrames

test_df = pd.read_csv("test.csv")
train_df = pd.read_csv("train.csv")

In [3]:
# use df.values to get a numpy array from a DataFrame

testX = torch.tensor(test_df.loc[:, "POS_0_9_CP":"POS_90_CP"].values, dtype=torch.float64)
testY = torch.tensor(test_df.loc[:, ["DTH_CUM_CP"]].values, dtype=torch.float64)
trainX = torch.tensor(train_df.loc[:, "POS_0_9_CP":"POS_90_CP"].values, dtype=torch.float64)
trainY = torch.tensor(train_df.loc[:, ["DTH_CUM_CP"]].values, dtype=torch.float64)

In [4]:
#q1
# how many bytes does trainX consume
trainX.nelement() * trainX.element_size()

83520

In [5]:
# convert trainX to float16
trainX_float16 = trainX.to(dtype=torch.float16)

# convert back to float64
trainX_back_to_float64 = trainX_float16.to(dtype=torch.float64)

# subtract the resulting matrix from the original
difference = trainX - trainX_back_to_float64

# find the biggest absolute difference
biggest_difference = torch.max(torch.abs(difference)).item()

In [6]:
#q2
# the biggest difference we would have any one cell if we used float16
biggest_difference

0.0

In [7]:
#q3
# is a CUDA GPU available on the VM
torch.cuda.is_available()

False

# Part 2: Prediction with Hardcoded Model

In [8]:
coef = torch.tensor([
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040], # POS_50_59_CP
        [0.0300], # POS_60_69_CP
        [0.0300],
        [0.0300],
        [0.0300]
], dtype=trainX.dtype)
coef

tensor([[0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0040],
        [0.0300],
        [0.0300],
        [0.0300],
        [0.0300]], dtype=torch.float64)

In [9]:
#q4
# print the predicted number of deaths in this tract
(testX[0] @ coef).item()

9.844

In [10]:
total_deaths = 0.0

# loop through each census tract in testX
for row in testX:
    total_deaths += (row @ coef).item()

average_predicted_deaths = total_deaths / testX.size(0)

In [11]:
#q5
# the average number of predicted deaths over the whole testX dataset
average_predicted_deaths

12.073632183908048

# Part 3: Optimization

In [12]:
#q6
# what is y when x is a tensor containing 0.0
x = torch.tensor(0.0)
y = x ** 2 - 8 * x + 19
float(y)

19.0

In [13]:
#q7
# what x value minimizes y
x = torch.tensor(0.0, requires_grad=True)
optimizer = torch.optim.SGD([x], lr=0.05)

# optimization loop that uses torch.optim.SGD
for epoch in range(5000):
    y = x ** 2 - 8 * x + 19 # define the function
    y.backward() # compute the gradient
    optimizer.step() # perform the optimization step
    optimizer.zero_grad() # zero the gradients after updating

float(x)

3.9999990463256836

# Part 4: Linear Regression

In [14]:
#q8
# the MSE using this vector of zero coefficients
coef = torch.zeros((10, 1), dtype=trainX.dtype)

loss_fn = torch.nn.MSELoss()
loss = loss_fn(trainX @ coef, trainY)
loss.item()

197.8007662835249

In [15]:
torch.manual_seed(544)
ds = torch.utils.data.TensorDataset(trainX, trainY)
dl = torch.utils.data.DataLoader(ds, batch_size=50, shuffle=True)

In [16]:
loss_fn = torch.nn.MSELoss()
coef = torch.zeros((10, 1), dtype=trainX.dtype, requires_grad=True)
optimizer = torch.optim.SGD([coef], lr=0.000002)

# optimization loop that uses torch.optim.SGD
for epoch in range(500):
    for batchX, batchY in dl:
        predictions = batchX @ coef  # compute predictions
        loss = loss_fn(predictions, batchY)  # compute loss
        loss.backward()  # compute the gradient
        optimizer.step()  # perform the optimization step
        optimizer.zero_grad()  # zero the gradients after updating

In [17]:
#q9
# the MSE over the training data using the coefficients resulting from the above training
loss_fn(trainX @ coef, trainY).item()

26.8113940147193

In [18]:
#q10
# the MSE over the test data
loss_fn(testX @ coef, testY).item()

29.05854692548551