In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
import os.path
import torch.nn.functional as F
from torch.nn import init
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import random
from time import time
from sklearn.metrics import mean_squared_error

In [2]:
date_2016 = pd.date_range(start='2016-01-01',end='2016-12-31',freq='D')
date_2017 = pd.date_range(start='2017-01-01',end='2017-12-31',freq='D')
date_2016_df = pd.DataFrame({'quarter':date_2016.quarter,'ones':np.ones(len(date_2016)),'date':pd.Series(date_2016.date).apply(str)})
date_2017_df = pd.DataFrame({'quarter':date_2017.quarter,'ones':np.ones(len(date_2017)),'date':pd.Series(date_2017.date).apply(str)})

In [3]:
date_2016_df['dayofquarter'] = date_2016_df.groupby('quarter',as_index=False)['ones'].cumsum()
date_2017_df['dayofquarter'] = date_2017_df.groupby('quarter',as_index=False)['ones'].cumsum()

In [4]:
quarter_info = pd.concat([date_2016_df[['date','dayofquarter']],date_2017_df[['date','dayofquarter']]],axis=0)

In [5]:
train_data = pd.read_csv('./data/air_visit_nn.csv')
test_data = pd.read_csv('./data/sample_submission_nn.csv')

In [6]:
train_data.head()

Unnamed: 0,air_store_id,visit_date,visitors,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
0,air_ba937bf13d40fb24,2016-01-13,25,Dining bar,Tōkyō-to Minato-ku Shibakōen,2016-01-13,Wednesday,0,2016,1,13,13,1
1,air_ba937bf13d40fb24,2016-01-14,32,Dining bar,Tōkyō-to Minato-ku Shibakōen,2016-01-14,Thursday,0,2016,1,14,14,1
2,air_ba937bf13d40fb24,2016-01-15,29,Dining bar,Tōkyō-to Minato-ku Shibakōen,2016-01-15,Friday,0,2016,1,15,15,1
3,air_ba937bf13d40fb24,2016-01-16,22,Dining bar,Tōkyō-to Minato-ku Shibakōen,2016-01-16,Saturday,0,2016,1,16,16,1
4,air_ba937bf13d40fb24,2016-01-18,6,Dining bar,Tōkyō-to Minato-ku Shibakōen,2016-01-18,Monday,0,2016,1,18,18,1


In [7]:
train_data = pd.merge(right=train_data,left=quarter_info,how='right',right_on='visit_date',left_on='date')
test_data = pd.merge(right=test_data,left=quarter_info,how='right',right_on='visit_date',left_on='date')

In [8]:
test_data.head()

Unnamed: 0,date,dayofquarter,id,visitors,air_store_id,visit_date,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
0,2017-04-23,23.0,air_00a91d42b08b08d9_2017-04-23,0,air_00a91d42b08b08d9,2017-04-23,Italian/French,Tōkyō-to Chiyoda-ku Kudanminami,2017-04-23,Sunday,0,2017,4,23,113,2
1,2017-04-23,23.0,air_0164b9927d20bcc3_2017-04-23,0,air_0164b9927d20bcc3,2017-04-23,Italian/French,Tōkyō-to Minato-ku Shibakōen,2017-04-23,Sunday,0,2017,4,23,113,2
2,2017-04-23,23.0,air_0241aa3964b7f861_2017-04-23,0,air_0241aa3964b7f861,2017-04-23,Izakaya,Tōkyō-to Taitō-ku Higashiueno,2017-04-23,Sunday,0,2017,4,23,113,2
3,2017-04-23,23.0,air_0328696196e46f18_2017-04-23,0,air_0328696196e46f18,2017-04-23,Dining bar,Ōsaka-fu Ōsaka-shi Nakanochō,2017-04-23,Sunday,0,2017,4,23,113,2
4,2017-04-23,23.0,air_034a3d5b40d5b1b1_2017-04-23,0,air_034a3d5b40d5b1b1,2017-04-23,Cafe/Sweets,Ōsaka-fu Ōsaka-shi Ōhiraki,2017-04-23,Sunday,0,2017,4,23,113,2


In [9]:
train_data.head()

Unnamed: 0,date,dayofquarter,air_store_id,visit_date,visitors,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
0,2016-01-01,1.0,air_d0e8a085d8dc83aa,2016-01-01,8,Cafe/Sweets,Hyōgo-ken Kōbe-shi Sumiyoshi Higashimachi,2016-01-01,Friday,1,2016,1,1,1,1
1,2016-01-01,1.0,air_5c65468938c07fa5,2016-01-01,8,Other,Tōkyō-to Shibuya-ku Shibuya,2016-01-01,Friday,1,2016,1,1,1,1
2,2016-01-01,1.0,air_f690c42545146e0a,2016-01-01,7,Japanese food,Hokkaidō Sapporo-shi Minami 3 Jōnishi,2016-01-01,Friday,1,2016,1,1,1,1
3,2016-01-01,1.0,air_09a845d5b5944b01,2016-01-01,56,Izakaya,Fukuoka-ken Kurume-shi Jōnanmachi,2016-01-01,Friday,1,2016,1,1,1,1
4,2016-01-01,1.0,air_d0a7bd3339c3d12a,2016-01-01,62,Cafe/Sweets,Tōkyō-to Taitō-ku Higashiueno,2016-01-01,Friday,1,2016,1,1,1,1


## get the dictionaries.

In [10]:
store_set = set(train_data.air_store_id)
genre_set = set(train_data.air_genre_name)
area_set = set(train_data.air_area_name)
weekday_set = set(train_data.day_of_week)
holiday_set = set(train_data.holiday_flg)
year_set = set(train_data.year)
month_set =set(train_data.month)
day_set = set(train_data.day)
dayofyear_set = set(train_data.dayofyear)
quarter_set = set(train_data.quarter)
dayofquarter_set = set(train_data.dayofquarter)

In [11]:
store_index_dict = {store:i for i,store in enumerate(store_set)}
genre_index_dict = {genre:i for i,genre in enumerate(genre_set)}
area_index_dict = {area:i for i,area in enumerate(area_set)}
weekday_index_dict = {weekday:i for i,weekday in enumerate(weekday_set)}
holiday_index_dict = {holi:i for i,holi in enumerate(holiday_set)}
year_index_dict = {year:i for i,year in enumerate(year_set)}
month_index_dict = {month:i for i,month in enumerate(month_set)}
day_index_dict = {day:i for i,day in enumerate(day_set)}
dayofyear_index_dict = {dayofyear:i for i,dayofyear in enumerate(dayofyear_set)}
quarter_index_dict = {quarter:i for i,quarter in enumerate(quarter_set)}
dayofquarter_index_dict = {dayofquarter:i for i,dayofquarter in enumerate(dayofquarter_set)}

In [12]:
def store_index(data):
    return store_index_dict[data]
def genre_index(data):
    return genre_index_dict[data]
def area_index(data):
    return area_index_dict[data]
def weekday_index(data):
    return weekday_index_dict[data]
def holiday_index(data):
    return holiday_index_dict[data]
def year_index(data):
    return year_index_dict[data]
def month_index(data):
    return month_index_dict[data]
def day_index(data):
    return day_index_dict[data]
def dayofyear_index(data):
    return dayofyear_index_dict[data]
def quarter_index(data):
    return quarter_index_dict[data]
def dayofquarter_index(data):
    return dayofquarter_index_dict[data]

## set up data sets

In [13]:
test_data.head()

Unnamed: 0,date,dayofquarter,id,visitors,air_store_id,visit_date,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
0,2017-04-23,23.0,air_00a91d42b08b08d9_2017-04-23,0,air_00a91d42b08b08d9,2017-04-23,Italian/French,Tōkyō-to Chiyoda-ku Kudanminami,2017-04-23,Sunday,0,2017,4,23,113,2
1,2017-04-23,23.0,air_0164b9927d20bcc3_2017-04-23,0,air_0164b9927d20bcc3,2017-04-23,Italian/French,Tōkyō-to Minato-ku Shibakōen,2017-04-23,Sunday,0,2017,4,23,113,2
2,2017-04-23,23.0,air_0241aa3964b7f861_2017-04-23,0,air_0241aa3964b7f861,2017-04-23,Izakaya,Tōkyō-to Taitō-ku Higashiueno,2017-04-23,Sunday,0,2017,4,23,113,2
3,2017-04-23,23.0,air_0328696196e46f18_2017-04-23,0,air_0328696196e46f18,2017-04-23,Dining bar,Ōsaka-fu Ōsaka-shi Nakanochō,2017-04-23,Sunday,0,2017,4,23,113,2
4,2017-04-23,23.0,air_034a3d5b40d5b1b1_2017-04-23,0,air_034a3d5b40d5b1b1,2017-04-23,Cafe/Sweets,Ōsaka-fu Ōsaka-shi Ōhiraki,2017-04-23,Sunday,0,2017,4,23,113,2


In [14]:
len(set(test_data.visit_date))

39

In [15]:
len(set(train_data.visit_date))

478

In [16]:
len(list(set(train_data.visit_date))[-39:])

39

## skip following lines the mask takes too much time

In [17]:
def valid_mask(data):
    if data.visit_date in list(set(train_data.visit_date))[-39:]:
        return True
    else:
        return False
def train_mask(data):
    if data.visit_date in list(set(train_data.visit_date))[-39:]:
        return False
    else:
        return True

In [18]:
train_data.iloc[:3].apply(valid_mask,axis=1)

0    False
1    False
2    False
dtype: bool

In [None]:
data_valid = train_data[train_data.apply(valid_mask,axis=1)]

In [25]:
data_train = train_data[train_data.apply(train_mask,axis=1)]

In [26]:
data_valid.shape

(20535, 15)

In [27]:
with open('train_valid_doq.pickle','wb') as f:
    pickle.dump((data_train,data_valid),f)

## start from here

In [20]:
with open('train_valid_doq.pickle','rb') as f:
    data_train,data_valid = pickle.load(f)

In [21]:
data_train.head()

Unnamed: 0,date,dayofquarter,air_store_id,visit_date,visitors,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
0,2016-01-01,1.0,air_d0e8a085d8dc83aa,2016-01-01,8,Cafe/Sweets,Hyōgo-ken Kōbe-shi Sumiyoshi Higashimachi,2016-01-01,Friday,1,2016,1,1,1,1
1,2016-01-01,1.0,air_5c65468938c07fa5,2016-01-01,8,Other,Tōkyō-to Shibuya-ku Shibuya,2016-01-01,Friday,1,2016,1,1,1,1
2,2016-01-01,1.0,air_f690c42545146e0a,2016-01-01,7,Japanese food,Hokkaidō Sapporo-shi Minami 3 Jōnishi,2016-01-01,Friday,1,2016,1,1,1,1
3,2016-01-01,1.0,air_09a845d5b5944b01,2016-01-01,56,Izakaya,Fukuoka-ken Kurume-shi Jōnanmachi,2016-01-01,Friday,1,2016,1,1,1,1
4,2016-01-01,1.0,air_d0a7bd3339c3d12a,2016-01-01,62,Cafe/Sweets,Tōkyō-to Taitō-ku Higashiueno,2016-01-01,Friday,1,2016,1,1,1,1


In [30]:
data_valid.head()

Unnamed: 0,date,dayofquarter,air_store_id,visit_date,visitors,air_genre_name,air_area_name,calendar_date,day_of_week,holiday_flg,year,month,day,dayofyear,quarter
48,2016-01-02,2.0,air_5c65468938c07fa5,2016-01-02,8,Other,Tōkyō-to Shibuya-ku Shibuya,2016-01-02,Saturday,1,2016,1,2,2,1
49,2016-01-02,2.0,air_9fc607777ad76b26,2016-01-02,16,Other,Tōkyō-to Meguro-ku Kamimeguro,2016-01-02,Saturday,1,2016,1,2,2,1
50,2016-01-02,2.0,air_326ca454ef3558bc,2016-01-02,6,Izakaya,Shizuoka-ken Shizuoka-shi Ōtemachi,2016-01-02,Saturday,1,2016,1,2,2,1
51,2016-01-02,2.0,air_f690c42545146e0a,2016-01-02,2,Japanese food,Hokkaidō Sapporo-shi Minami 3 Jōnishi,2016-01-02,Saturday,1,2016,1,2,2,1
52,2016-01-02,2.0,air_54d6c25d33f5260e,2016-01-02,32,Izakaya,Fukuoka-ken Fukuoka-shi Shiobaru,2016-01-02,Saturday,1,2016,1,2,2,1


In [22]:
# normalize y
'''
y_train = pd.concat([data_train.visitors,data_valid.visitors],axis=0).values
max_log_y = np.max(np.log(y_train))
def get_y(data):
    return np.log(data)/max_log_y
'''
y_train = data_train.visitors.apply(np.log1p)
y_valid = data_valid.visitors.apply(np.log1p)

## set up data for train

In [23]:
xtrain_dict = {
    'store':data_train.air_store_id.apply(store_index).values,
    'genre':data_train.air_genre_name.apply(genre_index).values,
    'area':data_train.air_area_name.apply(area_index).values,
    'weekday':data_train.day_of_week.apply(weekday_index).values,
    'holiday':data_train.holiday_flg.apply(holiday_index).values,
    'year':data_train.year.apply(year_index).values,
    'month':data_train.month.apply(month_index).values,
    'day':data_train.day.apply(day_index).values,
    'dayofyear':data_train.dayofyear.apply(dayofyear_index).values,
    'quarter':data_train.quarter.apply(quarter_index).values,
    'dayofquarter':data_train.dayofquarter.apply(dayofquarter_index).values
}
xval_dict = {
    'store':data_valid.air_store_id.apply(store_index).values,
    'genre':data_valid.air_genre_name.apply(genre_index).values,
    'area':data_valid.air_area_name.apply(area_index).values,
    'weekday':data_valid.day_of_week.apply(weekday_index).values,
    'holiday':data_valid.holiday_flg.apply(holiday_index).values,
    'year':data_valid.year.apply(year_index).values,
    'month':data_valid.month.apply(month_index).values,
    'day':data_valid.day.apply(day_index).values,
    'dayofyear':data_valid.dayofyear.apply(dayofyear_index).values,
    'quarter':data_valid.quarter.apply(quarter_index).values,
    'dayofquarter':data_valid.dayofquarter.apply(dayofquarter_index).values
}
xval_dict_for_val = {
    'store':Variable(torch.LongTensor(data_valid.air_store_id.apply(store_index).values),requires_grad=False),
    'genre':Variable(torch.LongTensor(data_valid.air_genre_name.apply(genre_index).values),requires_grad=False),
    'area':Variable(torch.LongTensor(data_valid.air_area_name.apply(area_index).values),requires_grad=False),
    'weekday':Variable(torch.LongTensor(data_valid.day_of_week.apply(weekday_index).values),requires_grad=False),
    'holiday':Variable(torch.LongTensor(data_valid.holiday_flg.apply(holiday_index).values),requires_grad=False),
    'year':Variable(torch.LongTensor(data_valid.year.apply(year_index).values),requires_grad=False),
    'month':Variable(torch.LongTensor(data_valid.month.apply(month_index).values),requires_grad=False),
    'day':Variable(torch.LongTensor(data_valid.day.apply(day_index).values),requires_grad=False),
    'dayofyear':Variable(torch.LongTensor(data_valid.dayofyear.apply(dayofyear_index).values),requires_grad=False),
    'quarter':Variable(torch.LongTensor(data_valid.quarter.apply(quarter_index).values),requires_grad=False),
    'dayofquarter':Variable(torch.LongTensor(data_valid.dayofquarter.apply(dayofquarter_index).values),requires_grad=False)
}
xtest_dict = {
    'store':Variable(torch.LongTensor(test_data.air_store_id.apply(store_index).values),requires_grad=False),
    'genre':Variable(torch.LongTensor(test_data.air_genre_name.apply(genre_index).values),requires_grad=False),
    'area':Variable(torch.LongTensor(test_data.air_area_name.apply(area_index).values),requires_grad=False),
    'weekday':Variable(torch.LongTensor(test_data.day_of_week.apply(weekday_index).values),requires_grad=False),
    'holiday':Variable(torch.LongTensor(test_data.holiday_flg.apply(holiday_index).values),requires_grad=False),
    'year':Variable(torch.LongTensor(test_data.year.apply(year_index).values),requires_grad=False),
    'month':Variable(torch.LongTensor(test_data.month.apply(month_index).values),requires_grad=False),
    'day':Variable(torch.LongTensor(test_data.day.apply(day_index).values),requires_grad=False),
    'dayofyear':Variable(torch.LongTensor(test_data.dayofyear.apply(dayofyear_index).values),requires_grad=False),
    'quarter':Variable(torch.LongTensor(test_data.quarter.apply(quarter_index).values),requires_grad=False),
    'dayofquarter':Variable(torch.LongTensor(test_data.dayofquarter.apply(dayofquarter_index).values),requires_grad=False)
}

In [24]:
xval_df = pd.concat([pd.DataFrame(xval_dict),y_valid.reset_index(drop=True)],axis=1)
xtrain_df = pd.DataFrame(xtrain_dict)

In [25]:
xval_df.head()

Unnamed: 0,area,day,dayofquarter,dayofyear,genre,holiday,month,quarter,store,weekday,year,visitors
0,92,1,1,1,7,1,0,0,126,6,0,2.197225
1,88,1,1,1,7,1,0,0,18,6,0,2.833213
2,78,1,1,1,9,1,0,0,23,6,0,1.94591
3,76,1,1,1,3,1,0,0,330,6,0,1.098612
4,52,1,1,1,9,1,0,0,699,6,0,3.496508


In [26]:
def get_valid_data():
    data = xval_df.sample(100)
    y = data.visitors.values
    x = {
    'store':Variable(torch.LongTensor(data.store.values),requires_grad=False),
    'genre':Variable(torch.LongTensor(data.genre.values),requires_grad=False),
    'area':Variable(torch.LongTensor(data.area.values),requires_grad=False),
    'weekday':Variable(torch.LongTensor(data.weekday.values),requires_grad=False),
    'holiday':Variable(torch.LongTensor(data.holiday.values),requires_grad=False),
    'year':Variable(torch.LongTensor(data.year.values),requires_grad=False),
    'month':Variable(torch.LongTensor(data.month.values),requires_grad=False),
    'day':Variable(torch.LongTensor(data.day.values),requires_grad=False),
    'dayofyear':Variable(torch.LongTensor(data.dayofyear.values),requires_grad=False),
    'quarter':Variable(torch.LongTensor(data.quarter.values),requires_grad=False),
    'dayofquarter':Variable(torch.LongTensor(data.dayofquarter.values),requires_grad=False)
    }
    return x,y

In [27]:
def batch_up(batch_size,features,labels):
    assert len(features) == len(labels)
    output_batch = []
    
    sample_size = len(features)
    for start_i in range(0,sample_size,batch_size):
        end_i = start_i + batch_size
        batch_feature = features[start_i:end_i]
        batch_label = labels[start_i:end_i]
        output_batch.append((batch_feature,batch_label))
        
    return output_batch

batched_train_data = batch_up(128,xtrain_df,y_train.values)

In [28]:
len(batched_train_data)

1810

In [29]:
def get_x_dict(data):
    diction = {
    'store':Variable(torch.LongTensor(data.store.values)),
    'genre':Variable(torch.LongTensor(data.genre.values)),
    'area':Variable(torch.LongTensor(data.area.values)),
    'weekday':Variable(torch.LongTensor(data.weekday.values)),
    'holiday':Variable(torch.LongTensor(data.holiday.values)),
    'year':Variable(torch.LongTensor(data.year.values)),
    'month':Variable(torch.LongTensor(data.month.values)),
    'day':Variable(torch.LongTensor(data.day.values)),
    'dayofyear':Variable(torch.LongTensor(data.dayofyear.values)),
    'quarter':Variable(torch.LongTensor(data.quarter.values)),
    'dayofquarter':Variable(torch.LongTensor(data.dayofquarter.values))
    }
    return diction

In [30]:
def retrive_y(data):
    return np.exp(data)-1
def rmsle(preds,real):
    return np.sqrt(mean_squared_error(real,preds))
def valid_full(model):
    ypreds = model(xval_dict_for_val).detach().numpy()
    #ypreds = pd.Series(ypreds.reshape((-1,))).apply(retrive_y).values
    #yreal = xval_df.visitors.values
    return rmsle(ypreds,y_valid.values)

In [31]:
def write_result(model,name='sub1.csv'):
    ypreds = model(xtest_dict).detach().numpy()
    ypreds = pd.Series(ypreds.reshape((-1,))).apply(retrive_y).values
    test_data['visitor'] = ypreds
    test_data[['id','visitor']].rename(columns={'visitor':'visitors'}).to_csv(name,index=False)

## define network

In [32]:
print(len(store_set),len(genre_set),len(area_set),len(weekday_set),len(holiday_set),len(year_set),\
     len(month_set),len(day_set),len(dayofyear_set),len(quarter_set),len(dayofquarter_set))

829 14 103 7 2 2 12 31 366 4 92


In [36]:
class Regressor(nn.Module):
    def __init__(self):
        super(Regressor,self).__init__()
        
        self.store_embeddor = nn.Embedding(len(store_set),15)
        self.genre_embeddor = nn.Embedding(len(genre_set),6)
        self.area_embeddor = nn.Embedding(len(area_set),10)
        self.weekday_embeddor = nn.Embedding(len(weekday_set),3)
        self.holiday_embeddor = nn.Embedding(len(holiday_set),2)
        self.year_embeddor = nn.Embedding(len(year_set),2)
        self.month_embeddor = nn.Embedding(len(month_set),6)
        self.day_embeddor = nn.Embedding(len(day_set),8)
        self.dayofyear_embeddor = nn.Embedding(len(dayofyear_set),10)
        self.quarter_embeddor = nn.Embedding(len(quarter_set),2)
        self.dayofquarter_embeddor = nn.Embedding(len(dayofquarter_set),10)
        
        self.e2h = nn.Linear(74,2500)
        self.bn0 = nn.BatchNorm1d(2500)
        self.h2h1 = nn.Linear(2500,1800)
        self.bn1 = nn.BatchNorm1d(1800)
        self.h2h2 = nn.Linear(1800,1500)
        self.bn2 = nn.BatchNorm1d(1500)
        self.h2h3 = nn.Linear(1500,1200)
        self.bn3 = nn.BatchNorm1d(1200)
        self.h2h4 = nn.Linear(1200,880)
        self.bn4 = nn.BatchNorm1d(880)
        self.h2h5 = nn.Linear(880,580)
        self.bn5 = nn.BatchNorm1d(580)
        self.h2h6 = nn.Linear(580,300)
        self.bn6 = nn.BatchNorm1d(300)
        self.h2h7 = nn.Linear(300,100)
        self.bn7 = nn.BatchNorm1d(100)
        self.h2o = nn.Linear(100,1)
        
        self.dropout = nn.Dropout(0.1)
        
        for layer in [self.e2h,self.h2h1,self.h2h2,self.h2h3,self.h2h4,self.h2h5,self.h2h6,self.h2h7,self.h2o]:
            self.init_wb(layer)
            
    def init_wb(self,layer):
        init.normal(layer.weight,mean=0.,std=0.1)
        init.constant(layer.bias,0)
        
    def forward(self,inputs):
        store_embeds = self.store_embeddor(inputs['store']).view(-1,15)
        genre_embeds = self.genre_embeddor(inputs['genre']).view(-1,6)
        area_embeds = self.area_embeddor(inputs['area']).view(-1,10)
        weekday_embeds = self.weekday_embeddor(inputs['weekday']).view(-1,3)
        holiday_embeds = self.holiday_embeddor(inputs['holiday']).view(-1,2)
        year_embeds = self.year_embeddor(inputs['year']).view(-1,2)
        month_embeds = self.month_embeddor(inputs['month']).view(-1,6)
        day_embeds = self.day_embeddor(inputs['day']).view(-1,8)
        dayofyear_embeds = self.dayofyear_embeddor(inputs['dayofyear']).view(-1,10)
        quarter_embeds = self.quarter_embeddor(inputs['quarter']).view(-1,2)
        dayofquarter_embeds = self.dayofquarter_embeddor(inputs['dayofquarter']).view(-1,10)
        
        merged = torch.cat([store_embeds,genre_embeds,area_embeds,weekday_embeds,\
                        holiday_embeds,year_embeds,month_embeds,day_embeds,\
                        dayofyear_embeds,quarter_embeds,dayofquarter_embeds],1)
        goods = self.dropout(self.bn0(self.e2h(merged)).clamp(min=0))
        goods = self.dropout(self.bn1(self.h2h1(goods)).clamp(min=0))
        goods = self.dropout(self.bn2(self.h2h2(goods)).clamp(min=0))
        goods = self.dropout(self.bn3(self.h2h3(goods)).clamp(min=0))
        goods = self.dropout(self.bn4(self.h2h4(goods)).clamp(min=0))
        goods = self.dropout(self.bn5(self.h2h5(goods)).clamp(min=0))
        goods = self.dropout(self.bn6(self.h2h6(goods)).clamp(min=0))
        goods = self.dropout(self.bn7(self.h2h7(goods)).clamp(min=0))
        
        output = self.h2o(goods)
        return output

In [37]:
def train(epochs=5,lr=0.01,optimizer='Adamax'):
    print('\ntrain with optimizer {} and learning rate of {}'.format(optimizer,lr))
    train_mse_loss = []
    val_mse_loss = []
    val_rmsle_loss = []
    model = Regressor()
    loss_fn = nn.MSELoss()
    if optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=lr)
    elif optimizer == 'Adamax':
        optimizer = optim.Adamax(model.parameters(),lr=lr)
    elif optimizer == 'Adadelta':
        optimizer = optim.Adadelta(model.parameters(),lr=lr)
    elif optimizer == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(),lr=lr)
    elif optimizer == 'ASGD':
        optimizer = optim.ASGD(model.parameters(),lr=lr)
    elif optimizer == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(),lr=lr)
    elif optimizer == 'Rprop':
        optimizer = optim.Rprop(model.parameters(),lr=lr)
    elif optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(),lr=lr,momentum=0.8)
    data = batched_train_data
    for epoch in range(epochs):
        start = time()
        val_mse = []
        val_rmsle = []
        train_loss = []
        for i,(x,y) in enumerate(data):
            inputs = get_x_dict(x)
            outputs = Variable(torch.FloatTensor(y))
            preds = model(inputs)
            loss = loss_fn(preds,outputs)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            def validate_model(x,y):
                ypreds = model(x).detach().numpy()
                yreal = y
                return rmsle(ypreds,yreal)
            def mse_(x,y):
                ypreds = model(x).detach().numpy()
                return mean_squared_error(y,ypreds)
            x_,y_ = get_valid_data()
            val_mse.append(mse_(x_,y_))
            val_rmsle.append(validate_model(x_,y_))
            train_loss.append(loss.data[0])
            if i%100 == 0:
                train_mse_loss.append(np.mean(train_loss))
                val_mse_loss.append(np.mean(val_mse))
                val_rmsle_loss.append(np.mean(val_rmsle))
                train_loss = []
                val_mae = []
                val_rmsle = []
        print('epoch {} losses are {:.4f},{:.4f},and {:.4f} this epoch takes {:.2f} minutes'.format(\
                                                    epoch,train_mse_loss[-1],\
                                                   val_mse_loss[-1],\
                                                   val_rmsle_loss[-1],(time()-start)/60))
    return model,train_mse_loss,val_mse_loss,val_rmsle_loss

In [38]:
valid_full(model1)

NameError: name 'model1' is not defined

In [63]:
write_result(model1)

In [39]:
result = {op+'_'+str(lr):{} for op in ['Adamax','Adadelta','Adagrad','Adam','ASGD',\
                                       'RMSprop','Rprop','SGD'] for lr in [0.05,0.01,0.002,0.001]}

In [41]:
for op in ['Adamax','Adadelta','Adagrad','Adam','ASGD','RMSprop','Rprop','SGD']:
    for lr in [0.05,0.01,0.002,0.001]:
        _,train_mse_loss,val_mse_loss,val_rmsle_loss = train(epochs=10,lr=lr,optimizer=op)
        result[op+'_'+str(lr)]['train_loss'] = train_mse_loss
        result[op+'_'+str(lr)]['val_loss'] = val_mse_loss
        result[op+'_'+str(lr)]['val_mae_loss'] = val_rmsle_loss


train with optimizer Adamax and learning rate of 0.05
epoch 0 losses are 0.3800,0.5932,and 0.7158 this epoch takes 8.27 minutes
epoch 1 losses are 0.4106,0.4939,and 0.6844 this epoch takes 8.75 minutes
epoch 2 losses are 0.4085,0.4514,and 0.6729 this epoch takes 9.37 minutes
epoch 3 losses are 0.4043,0.4439,and 0.6710 this epoch takes 10.36 minutes
epoch 4 losses are 0.4009,0.4407,and 0.6583 this epoch takes 11.45 minutes
epoch 5 losses are 0.3992,0.4408,and 0.6637 this epoch takes 12.81 minutes
epoch 6 losses are 0.3991,0.4427,and 0.6761 this epoch takes 13.75 minutes
epoch 7 losses are 0.3931,0.4463,and 0.6730 this epoch takes 14.36 minutes
epoch 8 losses are 0.3967,0.4491,and 0.6681 this epoch takes 14.71 minutes
epoch 9 losses are 0.3894,0.4522,and 0.6716 this epoch takes 14.90 minutes

train with optimizer Adamax and learning rate of 0.01
epoch 0 losses are 0.4180,0.6210,and 0.7303 this epoch takes 7.87 minutes
epoch 1 losses are 0.4126,0.5369,and 0.7269 this epoch takes 8.14 min

epoch 3 losses are 0.4719,0.6492,and 0.8018 this epoch takes 4.87 minutes
epoch 4 losses are 0.4676,0.6400,and 0.7815 this epoch takes 4.87 minutes
epoch 5 losses are 0.4629,0.6251,and 0.7843 this epoch takes 4.86 minutes
epoch 6 losses are 0.4566,0.6183,and 0.7849 this epoch takes 5.22 minutes
epoch 7 losses are 0.4468,0.6143,and 0.7754 this epoch takes 6.08 minutes
epoch 8 losses are 0.4473,0.6060,and 0.7789 this epoch takes 5.95 minutes
epoch 9 losses are 0.4385,0.6041,and 0.7748 this epoch takes 6.09 minutes

train with optimizer Adagrad and learning rate of 0.001
epoch 0 losses are 0.7667,1.8395,and 0.9436 this epoch takes 6.09 minutes
epoch 1 losses are 0.5824,0.7718,and 0.8459 this epoch takes 6.11 minutes
epoch 2 losses are 0.5474,0.7193,and 0.8525 this epoch takes 6.10 minutes
epoch 3 losses are 0.5336,0.7048,and 0.8323 this epoch takes 6.08 minutes
epoch 4 losses are 0.5191,0.7005,and 0.8364 this epoch takes 6.03 minutes
epoch 5 losses are 0.5051,0.6850,and 0.8260 this epoch 

epoch 7 losses are 0.3371,0.4761,and 0.7065 this epoch takes 16.00 minutes
epoch 8 losses are 0.3399,0.4768,and 0.6922 this epoch takes 16.07 minutes
epoch 9 losses are 0.3382,0.4844,and 0.7111 this epoch takes 16.13 minutes

train with optimizer RMSprop and learning rate of 0.01
epoch 0 losses are 0.3472,0.5778,and 0.6941 this epoch takes 12.55 minutes
epoch 1 losses are 0.3348,0.4588,and 0.6753 this epoch takes 12.56 minutes
epoch 2 losses are 0.3325,0.4464,and 0.6605 this epoch takes 12.57 minutes
epoch 3 losses are 0.3266,0.4417,and 0.6698 this epoch takes 12.96 minutes
epoch 4 losses are 0.3203,0.4433,and 0.6696 this epoch takes 14.37 minutes
epoch 5 losses are 0.3164,0.4487,and 0.6707 this epoch takes 14.68 minutes
epoch 6 losses are 0.3125,0.4558,and 0.6736 this epoch takes 14.98 minutes
epoch 7 losses are 0.3105,0.4663,and 0.6758 this epoch takes 15.03 minutes
epoch 8 losses are 0.3078,0.4722,and 0.6722 this epoch takes 15.16 minutes
epoch 9 losses are 0.3069,0.4803,and 0.6946 

epoch 0 losses are 0.6876,0.8228,and 0.8494 this epoch takes 11.34 minutes
epoch 1 losses are 0.6357,0.7076,and 0.8381 this epoch takes 8.51 minutes
epoch 2 losses are 0.5901,0.6983,and 0.8204 this epoch takes 5.52 minutes
epoch 3 losses are 0.5625,0.6873,and 0.8236 this epoch takes 6.81 minutes
epoch 4 losses are 0.5352,0.6872,and 0.8296 this epoch takes 11.61 minutes
epoch 5 losses are 0.5156,0.6778,and 0.8133 this epoch takes 11.60 minutes
epoch 6 losses are 0.4998,0.6739,and 0.8182 this epoch takes 11.59 minutes
epoch 7 losses are 0.4914,0.6698,and 0.8188 this epoch takes 11.55 minutes
epoch 8 losses are 0.4759,0.6708,and 0.8006 this epoch takes 11.61 minutes
epoch 9 losses are 0.4719,0.6567,and 0.8074 this epoch takes 11.66 minutes


In [61]:
xtest_dict.keys()

dict_keys(['store', 'genre', 'area', 'weekday', 'holiday', 'year', 'month', 'day', 'dayofyear', 'quarter', 'dayof_quarter'])

In [64]:
result = pd.read_csv('sub1.csv')

In [65]:
result.head()

Unnamed: 0,id,visitors
0,air_00a91d42b08b08d9_2017-04-23,37.720238
1,air_0164b9927d20bcc3_2017-04-23,7.191416
2,air_0241aa3964b7f861_2017-04-23,10.825824
3,air_0328696196e46f18_2017-04-23,13.505601
4,air_034a3d5b40d5b1b1_2017-04-23,25.329669
