In [1]:
from torch import nn
import torch
import pandas as pd
import numpy as np
import csv
import random
import sklearn.model_selection
from torch.utils.tensorboard.writer import SummaryWriter
import tqdm
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
import os

torch.backends.cudnn.benchmark = True

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

class LSTM(nn.Module):

    def __init__(self):
        super().__init__()
        self.lstm  = nn.LSTM(4, 64, 2,bidirectional = True, batch_first=True)
        self.lstm1  = nn.LSTM(64, 128, 2,bidirectional = True, batch_first=True)
        self.fc1 = nn.Linear(256, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.delay = 24
        self.future = 24
    def forward(self, x):
        _, (h, c) = self.lstm(x)
        a = h[-1,:,:]
        a = a.unsqueeze(1).repeat(1, self.future, 1)
        a, (hidden_state, cell_state) = self.lstm1(a)
        # a = x.reshape((-1, self.seq_len, self.hidden_size))
        a = self.fc1(a)
        a = self.tanh(a)
        a = self.fc2(a)
        a = x[:,-self.future:,0]+a[:,:,0]
        a = nn.ReLU()(a)
        return a


class REG(nn.Module):

    def __init__(self, bias=True):
        super(REG, self).__init__()
        self.predict24 = LSTM()
        self.adj_transform1 = nn.Linear(4, 10)
        self.adj_transform2 = nn.Linear(10, 1)
        self.linear1 = nn.Linear(24,100)
        self.linear2 = nn.Linear(100,24)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
    def forward(self, x, x_l, y_l, return_adj=False):
        a = self.predict24(x)
        S, _ = a.shape
        
        # construct adjacency matrix
        y_l = y_l.expand([S,2])
        adj = torch.cat((x_l, y_l), dim=-1)
        
        # non-linear function
        adj = self.adj_transform1(adj)
        adj = self.tanh(adj)
        adj = self.adj_transform2(adj)
        out = torch.mm(a.transpose(-1,0), adj)
        
        # activation
        out = out.transpose(-1,0)
        out = out.relu()
        if return_adj:
            return out, adj
        return out
    def compute_loss(self, x, x_l, y_l, desire):
        output = self(x, x_l, y_l)
        loss = torch.mean(torch.abs((desire-output)/desire))
        return loss 


In [3]:
df = pd.read_csv('dataset/data_train/location_input.csv')
lo_mean = df['longitude'].mean()
lo_std = df['longitude'].std()
la_mean = df['latitude'].mean()
la_std = df['latitude'].std()
def location(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        i = 0
        station = {}
        for row in csv_reader:
            if i>0:
                station[row[1]] = [(np.float64(row[2])-lo_mean)/lo_std,  (np.float64(row[3])-la_mean)/la_std]
            i +=1

    return station 


def read_csv(filename):
    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        data = []
        temp = []
        i = 0
        for row in csv_reader:
            if i>0:
                if row[2] =='' or row[3]=='' or row[4]=='':
                    if temp != []:
                        data.append(temp)
                    temp = []
                    continue
                temp.append([np.float64(row[2]),np.float64(row[3]),np.float64(row[4]),np.float32(row[1].split()[1].split(':')[0]),np.float64(row[0])])
            i +=1
        if len(temp)>0:
            data.append(temp)
    return data


In [5]:
model = REG()
checkpoint = torch.load('ckpt/model/checkpoints/mlp_epoch_9.ckpt', map_location='cuda:0')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

REG(
  (predict24): LSTM(
    (lstm): LSTM(4, 64, num_layers=2, batch_first=True, bidirectional=True)
    (lstm1): LSTM(64, 128, num_layers=2, batch_first=True, bidirectional=True)
    (fc1): Linear(in_features=256, out_features=64, bias=True)
    (fc2): Linear(in_features=64, out_features=1, bias=True)
    (relu): ReLU()
    (tanh): Tanh()
  )
  (adj_transform1): Linear(in_features=4, out_features=10, bias=True)
  (adj_transform2): Linear(in_features=10, out_features=1, bias=True)
  (linear1): Linear(in_features=24, out_features=100, bias=True)
  (linear2): Linear(in_features=100, out_features=24, bias=True)
  (sigmoid): Sigmoid()
  (tanh): Tanh()
)

In [7]:
for i in tqdm.tqdm(range(1,101)): 
    os.makedirs(f'prediction/{i}',exist_ok = True)
    delay = 36
    path_in = [os.path.join(f'dataset/public-test/input/{i}', f) for f in os.listdir(f'dataset/public-test/input/{i}')]
    input_location = location('dataset/data_train/location_input.csv')
    output_location = location('dataset/public-test/location.csv')
    inp = []
    inp_local = []
    for path1 in path_in:
        locate = os.path.basename(path1).split('.')[0]
        raw = read_csv(path1)
        if len(raw)>0:
            if raw[-1][-1][-1] == 167 and raw[-1][0][-1]  < (167-delay+1):
                inp += [np.stack(raw[-1][-delay:])]
                inp_local += [input_location[locate]]
    x = torch.Tensor(np.stack(inp))[:,:,:-1]
    x_l = torch.Tensor(np.stack(inp_local))
    path_save = f'prediction/{1}'
    index = 1
    for key in output_location.keys():
        y_l = torch.Tensor(output_location[key])
        y = model(x,x_l,y_l)
        out=pd.DataFrame({'PM2.5':y.squeeze(0).detach().numpy()})
        out.to_csv(f'prediction/{i}/res_{i}_{index}.csv',index=False)
        index = index+1

100%|██████████| 100/100 [00:10<00:00,  9.39it/s]
