## [DO NOT CHANGE THIS CELL] Import Dependencies
#### Do not use additional Packages! USE ONLY BELOW IMPORTED ONES

In [1]:
import os
import csv
import random
import numpy as np

import torch
import torch.nn as nn
from tqdm.notebook import tqdm

## [DO NOT CHANGE THIS CELL] Define file path (We use default data from colab)

In [2]:
train_csv_fp = './sample_data/california_housing_train.csv'
test_csv_fp = './sample_data/california_housing_test.csv'

## [DO NOT CHANGE THIS CELL] Define function for read CSV files

In [3]:
def read_csv(csv_fp):
    data = dict()
    all_names = []
    with open(csv_fp) as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            if i == 0:
                for name in row:
                    data[name] = list()
                    all_names.append(name)
            else:
                for ii, val in enumerate(row):
                    data[all_names[ii]].append(float(val))
    
    return data, all_names

## [DO NOT CHANGE THIS CELL] Read CSV files as a DICTIONARY

In [4]:
train_data, names = read_csv(train_csv_fp)
test_data, _ = read_csv(test_csv_fp)
print('Attributes are: ', names)

Attributes are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']


## [DO NOT CHANGE THIS CELL] Names of INPUT/OUTPUT

In [5]:
input_name = names[:-1]
output_name = names[-1]
print('Inputs are: ', input_name)
print('Outputs are: ', output_name)

Inputs are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']
Outputs are:  median_house_value


## [DO NOT CHANGE THIS CELL] Normalize the data

In [6]:
def normalize(data, mean=None, std=None):
    stat_flag = False
    if mean is None and std is None:
        mean = dict()
        std = dict()
        stat_flag = True
    
    for k, v in data.items():
        if stat_flag:
            mean[k] = np.mean(v)
            std[k] = np.std(v)
        data[k] = (np.array(v) - mean[k] ) / std[k]
    return data, mean, std

train_data, mean, std = normalize(train_data)
test_data, _, _ = normalize(test_data, mean, std)

## **[CHANGE ONLY BELOW CELL]** Code for Training

In [7]:
def train(data, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

    weight = np.random.rand(len(input_name), 1)
    bias = np.random.rand(1)

    ##### write your code to optimize weight and bias #####
    num_data = len(data[output_name])
    opt = torch.optim.Adam([nn.Parameter(torch.tensor(weight), requires_grad=True),nn.Parameter(torch.tensor(bias), requires_grad=True)], lr=0.00095, weight_decay=0.07)
    loss_fn = nn.MSELoss()
    for i in range(num_data):
        opt.zero_grad()
        curr_input = torch.tensor([data[name][i] for name in input_name])
        curr_answer = torch.tensor(data[output_name][i], requires_grad=True)
        weight = opt.param_groups[0]["params"][0]
        bias = opt.param_groups[0]["params"][1]
        curr_pred = (curr_input.T @ weight) + bias
        curr_loss = torch.sqrt(loss_fn(curr_pred, curr_answer))
        curr_loss.backward()
        opt.step()
    weight = weight.data.numpy()
    bias = bias.data.numpy()

    


    #######################################################

    # do not modify this return part.
    return weight, bias

##[DO NOT CHANGE THIS CELL] Code for testing

In [8]:
def test(test_data, weight, bias):
    num_data = len(test_data[output_name])

    avg_loss = 0.0
    for i in range(num_data):
        curr_input = np.asarray([test_data[name][i] for name in input_name])
        curr_answer = test_data[output_name][i]

        curr_pred = (curr_input.T @ weight) + bias

        curr_loss = np.linalg.norm(curr_answer - curr_pred)

        avg_loss += curr_loss / num_data

    return avg_loss

##[DO NOT CHANGE THIS CELL] Code for testing

In [9]:
max_iter = 50
total_loss = 0.0
for seed in tqdm(range(max_iter)):
    weight, bias = train(train_data, seed)

    test_loss = test(test_data, weight, bias)

    print('[SEED {}] TEST LOSS : {}'.format(seed, test_loss.item()))

    total_loss += test_loss / max_iter

print('TOTAL TEST LOSS: {}'.format(total_loss))

  0%|          | 0/50 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


[SEED 0] TEST LOSS : 0.5154484780759528
[SEED 1] TEST LOSS : 0.5125512884112028
[SEED 2] TEST LOSS : 0.5067318741795438
[SEED 3] TEST LOSS : 0.5184961119803139
[SEED 4] TEST LOSS : 0.5238447204190347
[SEED 5] TEST LOSS : 0.5117176241881661
[SEED 6] TEST LOSS : 0.5156074897380245
[SEED 7] TEST LOSS : 0.5080243719273488
[SEED 8] TEST LOSS : 0.5277499027377963
[SEED 9] TEST LOSS : 0.5010860886724302
[SEED 10] TEST LOSS : 0.5157120057323926
[SEED 11] TEST LOSS : 0.49881597701781627
[SEED 12] TEST LOSS : 0.5057891727862137
[SEED 13] TEST LOSS : 0.5163637093702441
[SEED 14] TEST LOSS : 0.5137534366793255
[SEED 15] TEST LOSS : 0.5183361765014282
[SEED 16] TEST LOSS : 0.5063627902873673
[SEED 17] TEST LOSS : 0.5073821332496491
[SEED 18] TEST LOSS : 0.5127220444851831
[SEED 19] TEST LOSS : 0.5031834793450508
[SEED 20] TEST LOSS : 0.520787276321559
[SEED 21] TEST LOSS : 0.4982390302430567
[SEED 22] TEST LOSS : 0.5079522526888457
[SEED 23] TEST LOSS : 0.5186940529772448
[SEED 24] TEST LOSS : 0.52

##[DO NOT CHANGE THIS CELL] Code for Grading

In [10]:
######## DO NOT CHANGE THIS GRADING PART ########
THRESHOLD = 0.5
max_point = 40

if total_loss < THRESHOLD:
    your_point = max_point
else:
    your_point = np.exp( -0.5* (total_loss - THRESHOLD) ) * max_point

print('YOUR POINT : {}/{}'.format(your_point, max_point))
####################################################

YOUR POINT : 39.75377586697804/40
