In [1]:
import torch
from torch import nn, optim
from torch.backends import cudnn
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import Dataset

from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
import math
import csv
import sys
import os


TRAIN_PATH = "../data/train.csv"
VALID_PATH = "../data/test.csv"

In [2]:
def Dataset_Train(path):
    df = pd.read_csv(path, encoding='big5')  ## Read data
    df.drop(columns=df.columns[:3], inplace=True) ## Remove first three columns
    df.replace('NR', 0, inplace=True)             ## Replace NR to 0
    df = df.astype(np.float)
    raw_data = df.to_numpy()
    # raw_data[raw_data<0] = 0

    month_data = {}
    for month in range(12):
        sample = np.empty([18, 480])
        for day in range(20):
            sample[:, day * 24 : (day + 1) * 24] = raw_data[18 * (20 * month + day) : 18 * (20 * month + day + 1), :]
        month_data[month] = sample

    x = np.empty([12 * 471, 18 * 9], dtype = float)
    y = np.empty([12 * 471, 1], dtype = float)
    for month in range(12):
        for day in range(20):
            for hour in range(24):
                if day == 19 and hour > 14:
                    continue
                x[month * 471 + day * 24 + hour, :] = month_data[month][:,day * 24 + hour : day * 24 + hour + 9].reshape(1, -1) #vector dim:18*9 (9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9)
                y[month * 471 + day * 24 + hour, 0] = month_data[month][9, day * 24 + hour + 9] #value

    x = np.insert(x, x.shape[1], values=1, axis=1)

    mean_x = np.mean(x, axis=0) #18 * 9 
    std_x = np.std(x, axis=0) #18 * 9 
    max_x = np.max(x, axis=0)
    min_x = np.min(x, axis=0)

    for i in range(len(x)): #12 * 471
        for j in range(len(x[0])): #18 * 9 
            if std_x[j] != 0:
    #             x[i][j] = (x[i][j] - mean_x[j]) / std_x[j]
                x[i][j] = (x[i][j] - min_x[j]) / (max_x[j] - min_x[j])
            
    return torch.tensor(x, device=device).float(), torch.tensor(y, device=device).float()

In [3]:
def Dataset_Test(path):
    testdata = pd.read_csv(VALID_PATH, header = None, encoding = 'big5')
    test_data = testdata.iloc[:, 2:]
    test_data[test_data == 'NR'] = 0
    test_data = test_data.astype(np.float)
    test_data = test_data.to_numpy()
    test_x = np.empty([240, 18*9], dtype = float)
    for i in range(240):
        test_x[i, :] = test_data[18 * i: 18* (i + 1), :].reshape(1, -1)

    # test_data[test_data < 0] = 0


    for i in range(len(test_x)):
        for j in range(len(test_x[0])):
            if std_x[j] != 0:
    #             test_x[i][j] = (test_x[i][j] - mean_x[j]) / std_x[j]
                test_x[i][j] = (test_x[i][j] - min_x[j]) / (max_x[j] - min_x[j])

    test_x = np.insert(test_x, test_x.shape[1], values=1, axis=1)
    
    return torch.tensor(test_x, device=device).float()

In [4]:
def Save_Weight(file_name):
    with open(file_name, mode='w', newline='') as submit_file:
        csv_writer = csv.writer(submit_file)
        header = ['id', 'value']
        print(header)
        csv_writer.writerow(header)
        for i in range(240):
            row = ['id_' + str(i), ans_y[i][0]]
            csv_writer.writerow(row)
            print(row)

## Model

In [11]:
class My_DNN(nn.Module):
    def __init__(self):
        super(My_DNN, self).__init__()
        self.base = nn.Sequential(
            nn.Linear(163, 512),
            nn.PReLU(),
            nn.Linear(512, 128),
            nn.PReLU(),
            
            nn.Linear(128, 64),
            nn.PReLU(),
            nn.Linear(64, 1)
        )

        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, std=0.001)

    def forward(self, x):
        x = self.base(x)
        return x        

### Training

In [None]:
## 
cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
##

## 超參數
LEARNING_RATE = 0.01
N_SPLIT = 5
N_EPOCHS = 1
##

model = My_DNN()
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_MSE = nn.MSELoss()

X, y = Dataset_Train(TRAIN_PATH)

cv = KFold(n_splits=5, random_state=42, shuffle=True)
n = 1
# for epoch in range(N_EPOCHS):
#     with tqdm(total=N_EPOCHS) as _tqdm:
#         _tqdm.set_description('epoch: {}/{}'.format(epoch + 1, N_EPOCHS))

#         ## Train
#         y_pred = model(X)
#         loss = loss_MSE(y_pred, y)

#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
        
#         _tqdm.set_postfix(loss='{:.6f}'.format(loss))
#         _tqdm.update(N_EPOCHS)      

#     n += 1



for train_idx, valid_idx in cv.split(X):
    for epoch in range(N_EPOCHS):
        with tqdm(total=N_EPOCHS) as _tqdm:
            _tqdm.set_description('Folder: {} epoch: {}/{}'.format(n, epoch + 1, N_EPOCHS))
            X_train, X_valid = X[train_idx,:].to(device), X[valid_idx,:].to(device)
            y_train, y_valid = y[train_idx].to(device), y[valid_idx].to(device)
            
            
            ## Train
            y_pred = model(X_train)
            loss = loss_MSE(y_pred, y_train)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            ##
            
            ## Valid
            y_pred = model(X_valid)
            loss = loss_MSE(y_pred, X_valid)

            _tqdm.set_postfix(loss='{:.6f}'.format(loss.item()))
            _tqdm.update(1000)        
            
    n += 1

Folder: 1 epoch: 1/100: : 1000it [00:00, 10879.77it/s, loss=0.156499]         
Folder: 1 epoch: 2/100: : 1000it [00:00, 10974.57it/s, loss=0.471378]         
Folder: 1 epoch: 3/100: : 1000it [00:00, 9744.79it/s, loss=26.119038]          
Folder: 1 epoch: 4/100: : 1000it [00:00, 10322.38it/s, loss=403.552216]         
Folder: 1 epoch: 5/100: : 1000it [00:00, 9340.66it/s, loss=3236.964111]          
Folder: 1 epoch: 6/100: : 1000it [00:00, 9201.67it/s, loss=882.587280]          
Folder: 1 epoch: 7/100: : 1000it [00:00, 8334.45it/s, loss=189.529678]          
Folder: 1 epoch: 8/100: : 1000it [00:00, 7751.78it/s, loss=45.526207]          
Folder: 1 epoch: 9/100: : 1000it [00:00, 10064.08it/s, loss=13.596735]         
Folder: 1 epoch: 10/100: : 1000it [00:00, 9445.57it/s, loss=5.506054]          
Folder: 1 epoch: 11/100: : 1000it [00:00, 8926.06it/s, loss=3.633015]          
Folder: 1 epoch: 12/100: : 1000it [00:00, 10479.11it/s, loss=3.684501]         
Folder: 1 epoch: 13/100: : 1000it [00

Folder: 2 epoch: 1/100: : 1000it [00:00, 8820.46it/s, loss=625.751587]          
Folder: 2 epoch: 2/100: : 1000it [00:00, 10490.14it/s, loss=618.957764]         
Folder: 2 epoch: 3/100: : 1000it [00:00, 9509.66it/s, loss=611.420166]          
Folder: 2 epoch: 4/100: : 1000it [00:00, 10189.62it/s, loss=609.541260]         
Folder: 2 epoch: 5/100: : 1000it [00:00, 10339.84it/s, loss=613.754944]         
Folder: 2 epoch: 6/100: : 1000it [00:00, 8956.39it/s, loss=618.609131]          
Folder: 2 epoch: 7/100: : 1000it [00:00, 9202.50it/s, loss=618.259277]          
Folder: 2 epoch: 8/100: : 1000it [00:00, 9510.76it/s, loss=612.530884]          
Folder: 2 epoch: 9/100: : 1000it [00:00, 8435.63it/s, loss=606.812744]          
Folder: 2 epoch: 10/100: : 1000it [00:00, 9571.51it/s, loss=606.275269]          
Folder: 2 epoch: 11/100: : 1000it [00:00, 9411.40it/s, loss=610.792175]          
Folder: 2 epoch: 12/100: : 1000it [00:00, 7363.13it/s, loss=615.324951]          
Folder: 2 epoch: 13/100: 

Folder: 3 epoch: 1/100: : 1000it [00:00, 8223.67it/s, loss=663.194092]          
Folder: 3 epoch: 2/100: : 1000it [00:00, 9804.79it/s, loss=623.354248]          
Folder: 3 epoch: 3/100: : 1000it [00:00, 9635.57it/s, loss=671.970337]          
Folder: 3 epoch: 4/100: : 1000it [00:00, 11004.95it/s, loss=682.037476]         
Folder: 3 epoch: 5/100: : 1000it [00:00, 7946.77it/s, loss=631.137878]          
Folder: 3 epoch: 6/100: : 1000it [00:00, 8135.76it/s, loss=656.458496]          
Folder: 3 epoch: 7/100: : 1000it [00:00, 10555.85it/s, loss=688.171082]         

In [7]:
N_FOLDS  = 5
train_idx, valid_idx = K_Fold(x.shape[0], 9, N_FOLDS)

weight_best = None
loss_best = sys.maxsize

for i in range(N_FOLDS):
    LRGD = LinearRegressionUsingGD(lr=0.06, eps=1e-10,iters=10000)
    
    x_train = np.asarray([x[idx] for idx in train_idx[i]])
    y_train = y[train_idx[i]]
    
    x_valid = np.asarray([x[idx] for idx in valid_idx[i]])
    y_valid = y[valid_idx[i]]
    
    LRGD.fit(x_train, y_train)
    y_pred = LRGD.predict(x_valid)
    loss = np.sqrt(np.mean(np.power(y_valid - y_pred, 2)))#rmse
    
    if loss < loss_best:
        weight_best = LRGD.ω
        loss_best = loss
    
    print("Folder: {}, Loss: {}".format(i, loss))
    
np.save("weight.npy", weight_best)

NameError: name 'K_Fold' is not defined