# simple DL for finedust prediction

@befreor

In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F

In [2]:
from PGM.access_data import dbAccess

import numpy as np
import pandas as pd

### Data Preprocessing

In [3]:
# example: 10_2016 - PM10, PM25, SO2, O3
pvId = "10"
year = "2016"
db = dbAccess("PGM/%s_db_%s.csv" % (pvId,year))
db_data = db.get_data("2016-01-01 08:00", "2016-12-31 18:00", "PV", "PM10","PM25","SO2", "O3")
data = pd.DataFrame(db_data)
print(data, len(data))

                     time      PV PM10 PM25    SO2     O3
0     2016-01-01 08:00:00    0.24   50   31  0.002  0.013
1     2016-01-01 09:00:00   115.2   49   28  0.002  0.011
2     2016-01-01 10:00:00  367.92   51   32  0.003  0.014
3     2016-01-01 11:00:00  585.24   64   34  0.003  0.022
4     2016-01-01 12:00:00  669.12   54   31  0.003  0.029
...                   ...     ...  ...  ...    ...    ...
8766  2016-12-31 14:00:00   665.4   62   37  0.022  0.021
8767  2016-12-31 15:00:00   527.4   78   43  0.013  0.027
8768  2016-12-31 16:00:00  303.48   65   29  0.003  0.033
8769  2016-12-31 17:00:00   70.32   39   10  0.003  0.035
8770  2016-12-31 18:00:00    0.96   27   15  0.003  0.034

[8771 rows x 6 columns] 8771


In [4]:
data[:10]

Unnamed: 0,time,PV,PM10,PM25,SO2,O3
0,2016-01-01 08:00:00,0.24,50,31,0.002,0.013
1,2016-01-01 09:00:00,115.2,49,28,0.002,0.011
2,2016-01-01 10:00:00,367.92,51,32,0.003,0.014
3,2016-01-01 11:00:00,585.24,64,34,0.003,0.022
4,2016-01-01 12:00:00,669.12,54,31,0.003,0.029
5,2016-01-01 13:00:00,695.52,59,38,0.005,0.033
6,2016-01-01 14:00:00,660.24,66,35,0.006,0.034
7,2016-01-01 15:00:00,485.88,68,37,0.014,0.035
8,2016-01-01 16:00:00,307.44,76,41,0.008,0.038
9,2016-01-01 17:00:00,80.16,56,37,0.003,0.039


In [5]:
data[53:57]

Unnamed: 0,time,PV,PM10,PM25,SO2,O3
53,2016-01-03 13:00:00,606.12,46,31.0,0.003,0.033
54,2016-01-03 14:00:00,632.88,45,39.0,0.015,0.027
55,2016-01-03 15:00:00,549.36,45,,0.018,0.04
56,2016-01-03 16:00:00,365.28,44,43.0,0.011,0.04


#### remove '' datas

In [6]:
for col in data.columns:
    data = data.loc[data[col] != '']

In [7]:
data = data.loc[data['PV'] != '0'] # remove "PV == 0"

In [8]:
print(data, len(data))

                     time      PV PM10 PM25    SO2     O3
0     2016-01-01 08:00:00    0.24   50   31  0.002  0.013
1     2016-01-01 09:00:00   115.2   49   28  0.002  0.011
2     2016-01-01 10:00:00  367.92   51   32  0.003  0.014
3     2016-01-01 11:00:00  585.24   64   34  0.003  0.022
4     2016-01-01 12:00:00  669.12   54   31  0.003  0.029
...                   ...     ...  ...  ...    ...    ...
8766  2016-12-31 14:00:00   665.4   62   37  0.022  0.021
8767  2016-12-31 15:00:00   527.4   78   43  0.013  0.027
8768  2016-12-31 16:00:00  303.48   65   29  0.003  0.033
8769  2016-12-31 17:00:00   70.32   39   10  0.003  0.035
8770  2016-12-31 18:00:00    0.96   27   15  0.003  0.034

[3989 rows x 6 columns] 3989


In [9]:
data[:10]

Unnamed: 0,time,PV,PM10,PM25,SO2,O3
0,2016-01-01 08:00:00,0.24,50,31,0.002,0.013
1,2016-01-01 09:00:00,115.2,49,28,0.002,0.011
2,2016-01-01 10:00:00,367.92,51,32,0.003,0.014
3,2016-01-01 11:00:00,585.24,64,34,0.003,0.022
4,2016-01-01 12:00:00,669.12,54,31,0.003,0.029
5,2016-01-01 13:00:00,695.52,59,38,0.005,0.033
6,2016-01-01 14:00:00,660.24,66,35,0.006,0.034
7,2016-01-01 15:00:00,485.88,68,37,0.014,0.035
8,2016-01-01 16:00:00,307.44,76,41,0.008,0.038
9,2016-01-01 17:00:00,80.16,56,37,0.003,0.039


In [10]:
data[53:56]

Unnamed: 0,time,PV,PM10,PM25,SO2,O3
147,2016-01-07 11:00:00,604.92,29,12,0.004,0.036
148,2016-01-07 12:00:00,725.76,34,15,0.004,0.037
149,2016-01-07 13:00:00,764.04,31,15,0.004,0.038


In [11]:
n_data = len(data)
num_train = 3000
num_test = 512

assert n_data >= num_train + num_test

In [12]:
# learning data
# xs
pm10 = np.array(data.PM10.values.astype(float))
pm25 = np.array(data.PM25.values.astype(float))
so2 = np.array(data.SO2.values.astype(float))
o3 = np.array(data.O3.values.astype(float))
# y
pv = np.array(data.PV.values.astype(float))

print(n_data, pv.shape)

3989 (3989,)


In [13]:
xs = np.stack([pm10, pm25, so2, o3], axis=1)
ys = pv

In [14]:
train_xs, test_xs = xs[:num_train], xs[num_train:num_train + num_test]
train_ys, test_ys = ys[:num_train], ys[num_train:num_train + num_test]

In [15]:
print(train_xs[:10], test_ys[:10])

[[5.0e+01 3.1e+01 2.0e-03 1.3e-02]
 [4.9e+01 2.8e+01 2.0e-03 1.1e-02]
 [5.1e+01 3.2e+01 3.0e-03 1.4e-02]
 [6.4e+01 3.4e+01 3.0e-03 2.2e-02]
 [5.4e+01 3.1e+01 3.0e-03 2.9e-02]
 [5.9e+01 3.8e+01 5.0e-03 3.3e-02]
 [6.6e+01 3.5e+01 6.0e-03 3.4e-02]
 [6.8e+01 3.7e+01 1.4e-02 3.5e-02]
 [7.6e+01 4.1e+01 8.0e-03 3.8e-02]
 [5.6e+01 3.7e+01 3.0e-03 3.9e-02]] [253.8  422.64 531.36 652.2  637.92 569.52 431.16 276.6  180.96  41.88]


# DL meta for finedust

#### custom lienar

In [17]:
def linear(input, weight, bias=None):
    if bias is None:
        return F.linear(input, weight.cuda())
    else:
        return F.linear(input, weight.cuda(), bias.cuda())

#### DL model

In [18]:
class Encoder(nn.Module):
    def __init__(self, in_dim, hid_dim, out_dim):
        super(Encoder, self).__init__()
        self.in_dim = in_dim
        self.hid_dim = hid_dim
        self.out_dim = out_dim

        self.layer = nn.Sequential(
                        nn.Linear(in_dim, hid_dim),
                        nn.ReLU(),
                        nn.Linear(hid_dim, hid_dim),
                        nn.ReLU(),
                        nn.Linear(hid_dim, out_dim))
        self.relu = nn.ReLU()

    def forward(self, x, params=None):
        if params is None:
            out = self.layer(x)
            out = out.view(out.size(0), -1)
            return out
        else:
            out = linear(x, params['layer.0.weight'], params['layer.0.bias'])
            out = self.relu(out)
            out = linear(out, params['layer.2.weight'], params['layer.2.bias'])
            out = self.relu(out)
            out = linear(out, params['layer.4.weight'], params['layer.4.bias'])
            out = out.view(out.size(0), -1)
            return out

#### config

In [19]:
N = 4 # data dimension number
H = 64 # hidden number
OUT = 1 # output dimension
SUPPORT_SIZE = 16 # support size
QUERY_SIZE = 4 # query size

learning_rate = 1e-4 # learning 
n_iter = 100000

n_train = len(train_xs)
n_test = len(test_xs)
print(n_train, n_test)

3000 512


#### data

In [20]:
# dummy data
'''
prem_x = torch.randn(batch_size, N).cuda()
prem_y = torch.randn(batch_size, 1).cuda()

postm_x = torch.randn(batch_size, N).cuda()
postm_y = torch.randn(batch_size, 1).cuda()
'''

tr_x = torch.tensor(train_xs).cuda()
tr_y = torch.tensor(train_ys).cuda()

te_x = torch.tensor(test_xs).cuda()
te_y = torch.tensor(test_ys).cuda()

N_TRAIN = len(tr_x)
N_TEST = len(te_x)
print(N_TRAIN, N_TEST)

3000 512


In [21]:
def data_gen(dx, dy, i, s, q, l):
    # dx, dy: data, i: iter, s: support size, q: query size, l: len(data)
    cur = (i * (s+q)) % l
    nxt = ((i+1) * (s+q)) % l
    if cur < nxt:
        result_x =  dx[cur: cur+nxt]
        result_y =  dy[cur: cur+nxt]
    elif cur >= nxt and nxt != 0:
        result_x =  torch.cat([dx[cur:], dx[:nxt]], axis=1)
        result_y =  torch.cat([dy[cur:], dy[:nxt]], axis=1)
    else:
        result_x =  dx[cur:]
        result_y =  dy[cur:]
    
    # support data, query data
    return result_x[:s,:].float(), result_y[:s].float(), result_x[s:s+q,:].float(), result_y[s:s+q].float()

#### algo.

In [22]:
model = Encoder(N, H, OUT).cuda()
model.train()

In [23]:
mse = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
org_param = {}
for k,v in model.named_parameters():
    org_param[k] = v

In [25]:
org_param['layer.0.weight'].type()

'torch.cuda.FloatTensor'

In [26]:
for i in range(n_iter):
    
    prem_x, prem_y, postm_x, postm_y = data_gen(tr_x, tr_y, i, SUPPORT_SIZE, QUERY_SIZE, N_TRAIN)

    prem_y = prem_y.reshape(-1, 1)
    postm_y = postm_y.reshape(-1, 1)
    org_param = {}
    for k,v in model.named_parameters():Q
        org_param[k] = v
    
    # pre-update
    pred_y = model(prem_x)
    pre_loss = mse(pred_y, prem_y)
    
    param_ec = {}

    for k,v in org_param.items():
        _g = torch.autograd.grad(pre_loss, v, create_graph=True)[0]
        param_ec[k] = v - 0.0003 * _g
    
    
    # post-update
    post_pred_y = model(postm_x, param_ec)
    post_loss = mse(post_pred_y, postm_y)
    
    if i % 1000 == 1:
        print(i, post_loss)
    
    optimizer.zero_grad()

    post_loss.backward()
    optimizer.step()
        

1 tensor(2432.2466, device='cuda:0', grad_fn=<MseLossBackward>)
1001 tensor(1965.8884, device='cuda:0', grad_fn=<MseLossBackward>)
2001 tensor(49271368., device='cuda:0', grad_fn=<MseLossBackward>)
3001 tensor(7391.0479, device='cuda:0', grad_fn=<MseLossBackward>)
4001 tensor(2883.6167, device='cuda:0', grad_fn=<MseLossBackward>)
5001 tensor(2469262., device='cuda:0', grad_fn=<MseLossBackward>)
6001 tensor(6790.2344, device='cuda:0', grad_fn=<MseLossBackward>)
7001 tensor(2612.3977, device='cuda:0', grad_fn=<MseLossBackward>)
8001 tensor(449206.6562, device='cuda:0', grad_fn=<MseLossBackward>)
9001 tensor(5503.4556, device='cuda:0', grad_fn=<MseLossBackward>)
10001 tensor(2140.6562, device='cuda:0', grad_fn=<MseLossBackward>)
11001 tensor(7577.3442, device='cuda:0', grad_fn=<MseLossBackward>)
12001 tensor(3833.0347, device='cuda:0', grad_fn=<MseLossBackward>)
13001 tensor(1857.5337, device='cuda:0', grad_fn=<MseLossBackward>)
14001 tensor(287589.6875, device='cuda:0', grad_fn=<MseLossB

## TEST(prediction)

In [None]:
te_x = torch.tensor(test_xs).cuda()
te_y = torch.tensor(test_ys).cuda()

In [30]:
model.eval()
TEST_ITER = 25

total_loss = 0.

for i in range(TEST_ITER):
    prem_x, prem_y, postm_x, postm_y = data_gen(te_x, te_y, i, SUPPORT_SIZE, QUERY_SIZE, N_TEST)
    
    prem_y = prem_y.reshape(-1, 1)
    postm_y = postm_y.reshape(-1, 1)
    org_param = {}
    for k,v in model.named_parameters():
        org_param[k] = v
    
    # pre-update
    pred_y = model(prem_x)
    pre_loss = mse(pred_y, prem_y)
    
    param_ec = {}

    for k,v in org_param.items():
        _g = torch.autograd.grad(pre_loss, v, create_graph=True)[0]
        param_ec[k] = v - 0.0003 * _g
    
    
    # post-update
    post_pred_y = model(postm_x, param_ec)
    post_loss = mse(post_pred_y, postm_y)
    
    total_loss += post_loss
    
total_loss /= TEST_ITER * QUERY_SIZE
print(total_loss)

tensor(14467.8174, device='cuda:0', grad_fn=<DivBackward0>)


## Plain DL model

In [40]:
N = 4 # data dimension number
H = 128 # hidden number
OUT = 1 # output dimension
batch_size = 32 # batch size

learning_rate = 1e-3 # learning rate

# dummy data
#x = torch.randn(batch_size, N)
#y = torch.randn(batch_size, 1)

tr_x = torch.tensor(train_xs).cuda()
tr_y = torch.tensor(train_ys).cuda().reshape(-1, 1)

print(tr_x.shape)

#te_x = torch.tensor(test_xs).cuda()
#te_y = torch.tensor(test_ys).cuda().reshape(-1, 1)

x = tr_x.float()
y = tr_y.float()

model = torch.nn.Sequential(
    torch.nn.Linear(N, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, OUT),
)

model.cuda()
model.train()

loss_fn = torch.nn.MSELoss(reduction='sum')

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(100000):
    
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    
    optimizer.zero_grad()
    
    if t % 10000 == 1:
        print(t, loss)

    loss.backward()
    optimizer.step()


torch.Size([3000, 4])
1 tensor(4.4789e+08, device='cuda:0', grad_fn=<MseLossBackward>)
10001 tensor(1.3517e+08, device='cuda:0', grad_fn=<MseLossBackward>)
20001 tensor(1.3096e+08, device='cuda:0', grad_fn=<MseLossBackward>)
30001 tensor(1.2944e+08, device='cuda:0', grad_fn=<MseLossBackward>)
40001 tensor(1.2756e+08, device='cuda:0', grad_fn=<MseLossBackward>)
50001 tensor(1.2622e+08, device='cuda:0', grad_fn=<MseLossBackward>)
60001 tensor(1.2458e+08, device='cuda:0', grad_fn=<MseLossBackward>)
70001 tensor(1.2397e+08, device='cuda:0', grad_fn=<MseLossBackward>)
80001 tensor(1.2110e+08, device='cuda:0', grad_fn=<MseLossBackward>)
90001 tensor(1.1944e+08, device='cuda:0', grad_fn=<MseLossBackward>)


In [42]:
te_x = torch.tensor(test_xs).cuda().float()
te_y = torch.tensor(test_ys).cuda().float()

model.eval()

y_pred = model(te_x)
loss = loss_fn(y_pred, te_y)
print(loss)

tensor(1.9487e+10, device='cuda:0', grad_fn=<MseLossBackward>)


  return F.mse_loss(input, target, reduction=self.reduction)
