## [DO NOT CHANGE THIS CELL] Import Dependencies
#### Do not use additional Packages! USE ONLY BELOW IMPORTED ONES

In [None]:
import os
import csv
import random
import numpy as np

import torch
import torch.nn as nn
from tqdm.notebook import tqdm

## [DO NOT CHANGE THIS CELL] Define file path (We use default data from colab)

In [None]:
train_csv_fp = './sample_data/california_housing_train.csv'
test_csv_fp = './sample_data/california_housing_test.csv'

## [DO NOT CHANGE THIS CELL] Define function for read CSV files

In [None]:
def read_csv(csv_fp):
    data = dict()
    all_names = []
    with open(csv_fp) as f:
        reader = csv.reader(f)
        for i, row in enumerate(reader):
            if i == 0:
                for name in row:
                    data[name] = list()
                    all_names.append(name)
            else:
                for ii, val in enumerate(row):
                    data[all_names[ii]].append(float(val))
    
    return data, all_names

## [DO NOT CHANGE THIS CELL] Read CSV files as a DICTIONARY

In [None]:
train_data, names = read_csv(train_csv_fp)
test_data, _ = read_csv(test_csv_fp)
print('Attributes are: ', names)

Attributes are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']


## [DO NOT CHANGE THIS CELL] Names of INPUT/OUTPUT

In [None]:
input_name = names[:-1]
output_name = names[-1]
print('Inputs are: ', input_name)
print('Outputs are: ', output_name)

Inputs are:  ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income']
Outputs are:  median_house_value


## [DO NOT CHANGE THIS CELL] Normalize the data

In [None]:
def normalize(data, mean=None, std=None):
    stat_flag = False
    if mean is None and std is None:
        mean = dict()
        std = dict()
        stat_flag = True
    
    for k, v in data.items():
        if stat_flag:
            mean[k] = np.mean(v)
            std[k] = np.std(v)
        data[k] = (np.array(v) - mean[k] ) / std[k]
    return data, mean, std

train_data, mean, std = normalize(train_data)
test_data, _, _ = normalize(test_data, mean, std)

## **[CHANGE ONLY BELOW CELL]** Code for Training

In [None]:
def train(data, seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

    weight = np.random.rand(len(input_name), 1)
    bias = np.random.rand(1)

    ##### write your code to optimize weight and bias #####
    weight = nn.Parameter(torch.Tensor(weight), requires_grad = True)
    bias = nn.Parameter(torch.Tensor(bias), requires_grad = True)

    optimizer = torch.optim.Adam([weight, bias], lr=0.001)

    max_epoch = 50
    num_data = len(data[output_name])
    batch_size = num_data//200

    for epoch in tqdm(range(max_epoch)):
        rand_idx = np.arange(num_data)
        random.shuffle(rand_idx)

        total_loss = 0.0

        for i in range(0, num_data, batch_size):
            curr_batch_idx = rand_idx[i:min(num_data, i+batch_size)]

            curr_input = torch.Tensor([data[name][curr_batch_idx] for name in input_name])
            curr_answer = data[output_name][curr_batch_idx]
            curr_answer = torch.Tensor(curr_answer)
            
            curr_pred = torch.sum(curr_input * weight, dim=0) + bias

            loss = torch.mean((curr_pred - curr_answer) ** 2)
            
            total_loss += loss.item() / (num_data // batch_size)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # print('[EPOCH : {}] LOSS : {}'.format(epoch+1, total_loss))

    weight = weight.detach().numpy()
    bias = bias.detach().numpy()
    #####################s##################################

    # do not modify this return part.
    return weight, bias

##[DO NOT CHANGE THIS CELL] Code for testing

In [None]:
def test(test_data, weight, bias):
    num_data = len(test_data[output_name])

    avg_loss = 0.0
    for i in range(num_data):
        curr_input = np.asarray([test_data[name][i] for name in input_name])
        curr_answer = test_data[output_name][i]

        curr_pred = (curr_input.T @ weight) + bias

        curr_loss = np.linalg.norm(curr_answer - curr_pred)

        avg_loss += curr_loss / num_data

    return avg_loss

##[DO NOT CHANGE THIS CELL] Code for testing

In [None]:
max_iter = 50
total_loss = 0.0
for seed in tqdm(range(max_iter)):
    weight, bias = train(train_data, seed)

    test_loss = test(test_data, weight, bias)

    print('[SEED {}] TEST LOSS : {}'.format(seed, test_loss.item()))

    total_loss += test_loss / max_iter

print('TOTAL TEST LOSS: {}'.format(total_loss))

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]



[SEED 0] TEST LOSS : 0.43440659836523243


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 1] TEST LOSS : 0.4351249356229076


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 2] TEST LOSS : 0.43512341686187667


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 3] TEST LOSS : 0.43606036284616295


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 4] TEST LOSS : 0.4346856438457184


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 5] TEST LOSS : 0.4348915494708028


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 6] TEST LOSS : 0.4344954495034843


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 7] TEST LOSS : 0.43473580930287636


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 8] TEST LOSS : 0.4333226076956657


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 9] TEST LOSS : 0.4328732262866072


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 10] TEST LOSS : 0.43512975704988877


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 11] TEST LOSS : 0.43525868998366174


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 12] TEST LOSS : 0.4330770435510642


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 13] TEST LOSS : 0.43452423817614305


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 14] TEST LOSS : 0.4341413664127242


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 15] TEST LOSS : 0.43458124439058216


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 16] TEST LOSS : 0.43391402627252257


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 17] TEST LOSS : 0.4345532279219676


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 18] TEST LOSS : 0.4340520562530878


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 19] TEST LOSS : 0.43428994124148007


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 20] TEST LOSS : 0.43429419509778305


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 21] TEST LOSS : 0.4338621463806147


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 22] TEST LOSS : 0.4344003552023613


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 23] TEST LOSS : 0.4349476748971888


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 24] TEST LOSS : 0.4343049459684844


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 25] TEST LOSS : 0.43433361708996104


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 26] TEST LOSS : 0.4335528326945221


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 27] TEST LOSS : 0.43408489704396624


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 28] TEST LOSS : 0.43379517048343874


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 29] TEST LOSS : 0.43376095861037095


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 30] TEST LOSS : 0.43445146300293014


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 31] TEST LOSS : 0.43509363698163595


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 32] TEST LOSS : 0.4362405476280005


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 33] TEST LOSS : 0.43560746780724335


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 34] TEST LOSS : 0.43383778590479544


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 35] TEST LOSS : 0.4349353989569713


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 36] TEST LOSS : 0.4345178235663942


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 37] TEST LOSS : 0.4338402430614829


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 38] TEST LOSS : 0.4341606364935129


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 39] TEST LOSS : 0.4347407383079726


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 40] TEST LOSS : 0.433003513312295


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 41] TEST LOSS : 0.4348356454208853


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 42] TEST LOSS : 0.4337414386631836


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 43] TEST LOSS : 0.4349179789206127


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 44] TEST LOSS : 0.43499678422223337


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 45] TEST LOSS : 0.43338812131875787


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 46] TEST LOSS : 0.4341977244923364


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 47] TEST LOSS : 0.43409807669790335


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 48] TEST LOSS : 0.43466786538089275


  0%|          | 0/50 [00:00<?, ?it/s]

[SEED 49] TEST LOSS : 0.4340550992092771
TOTAL TEST LOSS: 0.43439811947744933


##[DO NOT CHANGE THIS CELL] Code for testing

In [None]:
######## DO NOT CHANGE THIS GRADING PART ########
THRESHOLD = 0.5
max_point = 40

if total_loss < THRESHOLD:
    your_point = max_point
else:
    your_point = np.exp( -0.25* (total_loss - THRESHOLD) ) * max_point

print('YOUR POINT : {}/{}'.format(your_point, max_point))
####################################################

YOUR POINT : 40/40
