# Apply CNN for timeseries data

In [28]:
import os
import pickle
import numpy as np
import pandas as pd

In [29]:
input_dir = '../input/'
working_dir = '../working/'
output_dir = '../output/'

## Read data

In [30]:
sleeper_type_dict = {
    1: 'pc',
    2: 'wooden',
    3: 'junction',
    4: 'short',
    5: 'synthetic',
    6: 'synth_junc',
    7: 'symth_short',
    8: 'other'
}

In [31]:
def read_track(line_name='a'):
    track_df = pd.read_csv(os.path.join(input_dir, 'track_fillna_{}.csv'.format(line_name.upper())))
    col_names_track = ['date', 'kilo', 'lev_l', 'lev_r', 'cur_l', 'cur_r', 'cant', 'width', 'speed']
    track_df.columns = col_names_track
    track_df['date'] = pd.to_datetime(track_df['date'])
    track_df['kilo'] = track_df['kilo'].astype('str')
    print('track_{line_name} shape: {shape}'.format(line_name=line_name.upper(), shape=track_df.shape))
    return track_df

In [32]:
def read_equ(line_name='a'):
    equ_df = pd.read_csv(os.path.join(input_dir, 'equipment_{}.csv'.format(line_name.upper())))
    col_names_equ = ['kilo', 'is_ballast', 'is_long', 'sleeper_type', 'is_bridge', 'is_crossing', 'gross_ton', 'radius', 'is_unreliable']
    equ_df.columns = col_names_equ
    equ_df['kilo'] = equ_df['kilo'].astype('str')
    equ_df['sleeper_type'] = equ_df['sleeper_type'].replace(sleeper_type_dict).astype('category')
    print('equ_{line_name} shape: {shape}'.format(line_name=line_name.upper(), shape=equ_df.shape))
    return equ_df

In [33]:
# degrade data types to save memory
def degrade_dtypes(df):
    for col in df.columns:
        if df[col].dtype=='int64':
            df[col] = df[col].astype('int32')
        if df[col].dtype=='float64':
            df[col] = df[col].astype('float32')
    return df

In [34]:
abcd = 'b'

In [35]:
track = read_track(abcd)
track = degrade_dtypes(track)
equ = read_equ(abcd)
equ = degrade_dtypes(equ)

track_B shape: (7815753, 9)
equ_B shape: (21531, 9)


In [36]:
# sample_submit = pd.read_csv(os.path.join(input_dir, 'sample_submit.csv'), header=None)
index_master = pd.read_csv(os.path.join(input_dir, 'index_master.csv'))
index_master.columns = ['id', 'line_name', 'date', 'kilo']
index_master['date'] = pd.to_datetime(index_master['date'])
index_master['kilo'] = index_master['kilo'].astype('str')

## とりあえず、21×21で切り取る。一行分作成。
1列ごとにやったほうが良かったかも

In [37]:
side_len = 29 #must be odd
side_len_half = int((side_len-1)/2)

In [38]:
track_pv = track.pivot(index='date', columns='kilo', values='lev_l')

In [39]:
track_pv.head()

kilo,10000,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,31521,31522,31523,31524,31525,31526,31527,31528,31529,31530
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-03,0.22,-2.59,-5.71,-7.0,-5.09,-0.56,4.23,6.54,4.93,0.11,...,1.66,-0.48,-2.69,-3.93,-3.67,-2.2,-0.39,0.89,1.23,0.91
2017-04-04,0.22,-2.59,-5.71,-7.0,-5.09,-0.56,4.23,6.54,4.93,0.11,...,1.66,-0.48,-2.69,-3.93,-3.67,-2.2,-0.39,0.89,1.23,0.91
2017-04-05,-0.4,-3.3,-6.13,-6.83,-4.32,0.5,5.1,6.83,4.56,-0.67,...,1.81,-0.13,-2.33,-3.77,-3.83,-2.6,-0.8,0.69,1.33,1.16
2017-04-06,-0.4,-2.6,-5.71,-6.84,-4.62,0.19,4.95,6.8,4.5,-0.83,...,1.81,-0.13,-2.33,-3.77,-3.83,-2.6,-0.8,0.69,1.33,1.16
2017-04-07,0.04,-2.69,-5.59,-6.61,-4.52,0.01,4.61,6.64,4.78,-0.16,...,1.81,-0.13,-2.33,-3.77,-3.83,-2.6,-0.8,0.69,1.33,1.16


In [40]:
track_pv_tgt = track_pv.iloc[side_len:, side_len_half:-side_len_half]

In [41]:
div_data = np.empty((track_pv_tgt.shape[1], 1, side_len, side_len))
row_num = 0
for col_num in range(track_pv_tgt.shape[1]):
    div_tmp = track_pv.values[row_num:row_num+side_len, col_num:col_num+side_len]
    div_data[col_num] = div_tmp.reshape(1, side_len, side_len)

In [42]:
div_label = track_pv_tgt.values[0,:]

In [43]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
from sklearn import model_selection

In [44]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device", device)

torch.manual_seed(0)

batch_size = 100
num_classes = 1
epochs = 20

device cpu


In [45]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(div_data, div_label, test_size=1/7, random_state=0)

# x_train = x_train.astype('double')
# x_test = x_test.astype('double')
# y_train = y_train.astype('float')
# y_test = y_test.astype('float')

In [46]:
ds_train = data.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train))
ds_test  = data.TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test))

In [47]:
dataloader_train = data.DataLoader(dataset=ds_train, batch_size=batch_size, shuffle=True)
dataloader_test = data.DataLoader(dataset=ds_test, batch_size=batch_size, shuffle=False)

In [48]:
class CNNModel (nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
 
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
 
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
 
        self.fc1 = nn.Linear(64*14*14, 128)
        self.fc2 = nn.Linear(128, num_classes)
 
 
    def forward(self, x):
        x = x.float()
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, (2, 2))
        x = self.dropout1(x)
 
        x = x.view(-1, 64*14*14)
 
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        return F.relu(self.fc2(x))

In [49]:
model = CNNModel().to(device)

In [50]:
print(model)

CNNModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (dropout1): Dropout2d(p=0.25)
  (dropout2): Dropout2d(p=0.5)
  (fc1): Linear(in_features=12544, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
)


In [51]:
criterion = nn.L1Loss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [52]:
global_step = 0
 
def train(epoch):#, writer):
    model.train()
    steps = len(ds_train)//batch_size
    for step, (images, labels) in enumerate(dataloader_train, 1):
        global global_step
        global_step += 1
        images, labels = images.to(device), labels.to(device)
        labels = labels.type(torch.LongTensor)
 
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
 
        if step % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' % (epoch, epochs, step, steps, loss.item()))
#             writer.add_scalar('train/train_loss', loss.item() , global_step)

In [53]:
def eval(epoch):#, writer):
    model.eval()
    abs_err_sum = 0
    total = 0
    with torch.no_grad():
        for (images, labels) in dataloader_test:
            images, labels = images.to(device), labels.to(device)
#             labels = labels.type(torch.LongTensor)
 
            outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
            abs_err_sum += abs(outputs - labels).sum()
            total += labels.size(0)
 
    print("Val Acc : %.4f" % (abs_err_sum/total))
#     writer.add_scalar('eval/val_acc', correct*100/total, epoch)

In [54]:
# from tensorboardX import SummaryWriter
# writer = SummaryWriter()
 
for epoch in range(1, epochs+1):
    train(epoch)#, writer)
    eval(epoch)#, writer)
 
# writer.close()

Epoch [1/20], Step [100/184], Loss: 0.9100
Val Acc : 139.2855
Epoch [2/20], Step [100/184], Loss: 1.1500
Val Acc : 139.2855
Epoch [3/20], Step [100/184], Loss: 1.2000
Val Acc : 139.2855
Epoch [4/20], Step [100/184], Loss: 1.1000
Val Acc : 139.2855
Epoch [5/20], Step [100/184], Loss: 1.0000
Val Acc : 139.2855
Epoch [6/20], Step [100/184], Loss: 1.1200
Val Acc : 139.2855
Epoch [7/20], Step [100/184], Loss: 1.1600
Val Acc : 139.2855
Epoch [8/20], Step [100/184], Loss: 0.9300
Val Acc : 139.2855
Epoch [9/20], Step [100/184], Loss: 1.1400
Val Acc : 139.2855
Epoch [10/20], Step [100/184], Loss: 1.3400
Val Acc : 139.2855
Epoch [11/20], Step [100/184], Loss: 0.9400
Val Acc : 139.2855
Epoch [12/20], Step [100/184], Loss: 0.7400
Val Acc : 139.2855
Epoch [13/20], Step [100/184], Loss: 1.0100
Val Acc : 139.2855
Epoch [14/20], Step [100/184], Loss: 0.9400
Val Acc : 139.2855
Epoch [15/20], Step [100/184], Loss: 1.0000
Val Acc : 139.2855
Epoch [16/20], Step [100/184], Loss: 1.1300
Val Acc : 139.2855
E