# RNN

## Imports

### Useful Librairies

In [19]:
import torch
import torchvision
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import dataloader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

### Source files

In [3]:
%load_ext autoreload
%autoreload 2

from RNN import RNNClassification

              datetime   Vancouver     ...        Montreal      Boston
0  2012-10-01 13:00:00  284.630000     ...      285.830000  287.170000
1  2012-10-01 14:00:00  284.629041     ...      285.834650  287.186092
2  2012-10-01 15:00:00  284.626998     ...      285.847790  287.231672
3  2012-10-01 16:00:00  284.624955     ...      285.860929  287.277251
4  2012-10-01 17:00:00  284.622911     ...      285.874069  287.322831

[5 rows x 31 columns]


NameError: name 'X' is not defined

## Script

In [47]:
def missing_ratio(data):
    """ Returns a new dataframe with all features as index and their
    corresponding ratio of missing values

    :param data: dataframe
    :returns: dataframe of missing values ratio for each feature
    :rtype: pandas.DataFrame

    """

    data_na = (data.isnull().sum() / len(data)) * 100
    all_data_na = data_na.drop(
        data_na[data_na == 0].index).sort_values(ascending=False)
    missing_data = pd.DataFrame({'Missing Ratio': all_data_na})

    return missing_data

## Loading Data

In [48]:
data = pd.read_csv('data/tempAMAL_train.csv')
data.head(3)

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,Denver,San Antonio,Dallas,Houston,Kansas City,Minneapolis,Saint Louis,Chicago,Nashville,Indianapolis,Atlanta,Detroit,Jacksonville,Charlotte,Miami,Pittsburgh,Toronto,Philadelphia,New York,Montreal,Boston
0,2012-10-01 13:00:00,284.63,282.08,289.48,281.8,291.87,291.53,293.41,296.6,285.12,284.61,289.29,289.74,288.27,289.98,286.87,286.18,284.01,287.41,283.85,294.03,284.03,298.17,288.65,299.72,281.0,286.26,285.63,288.22,285.83,287.17
1,2012-10-01 14:00:00,284.629041,282.083252,289.474993,281.797217,291.868186,291.533501,293.403141,296.608509,285.154558,284.607306,289.303649,289.762974,288.297576,289.997635,286.893636,286.185246,284.054691,287.42136,283.889394,294.035341,284.069789,298.20523,288.650172,299.732518,281.024767,286.262541,285.663208,288.247676,285.83465,287.186092
2,2012-10-01 15:00:00,284.626998,282.091866,289.460618,281.789833,291.862844,291.543355,293.392177,296.631487,285.233952,284.599918,289.338497,289.830767,288.334343,290.038151,286.951401,286.199194,284.177412,287.454637,283.941919,294.049702,284.173965,298.299595,288.650582,299.766579,281.088319,286.269518,285.756824,288.32694,285.84779,287.231672


## Missing Values

- Is there any missing values ? 

In [49]:
data.isnull().values.any()

True

- Show the first rows with a missing value

In [50]:
data[data.isna().any(axis=1)].head()

Unnamed: 0,datetime,Vancouver,Portland,San Francisco,Seattle,Los Angeles,San Diego,Las Vegas,Phoenix,Albuquerque,Denver,San Antonio,Dallas,Houston,Kansas City,Minneapolis,Saint Louis,Chicago,Nashville,Indianapolis,Atlanta,Detroit,Jacksonville,Charlotte,Miami,Pittsburgh,Toronto,Philadelphia,New York,Montreal,Boston
3858,2013-03-11 07:00:00,,277.776667,281.998,277.216667,,281.816667,283.353333,,271.568,267.978,288.288,,,280.114667,,285.099667,,285.596333,,285.011333,276.903,285.389667,,,,275.726667,,274.036667,,
3859,2013-03-11 08:00:00,,276.903333,280.756,276.633333,,281.563333,282.786667,,268.966,266.846,283.626,,,275.289333,,281.969333,,285.042667,,285.812667,277.946,286.139333,,,,275.783333,,273.603333,,
6917,2013-07-16 18:00:00,293.42,293.94,285.53,292.04,297.64,298.71,311.48,310.93,293.15,294.38,298.15,294.68,302.04,304.48,303.15,306.48,299.82,304.26,,,298.71,303.71,301.95,,303.971,303.49,306.72,307.37,303.71,304.26
6918,2013-07-16 19:00:00,294.5,294.782,288.79,294.73,299.015,300.332,312.65,312.745,291.755,293.94,299.505,295.105,302.61,303.59,303.165,305.61,301.135,304.95,,,301.02,303.27,301.332,,303.182,303.9,306.51,307.275,304.505,303.47
6946,2013-07-17 23:00:00,301.97,303.74,292.21,,300.38,296.26,312.59,310.79,296.34,297.09,298.036,304.27,299.06,305.68,304.82,307.04,305.07,306.48,306.036,,302.54,302.04,303.5755,,298.71,303.71,304.26,305.63,304.15,298.15


- Ratio of missing values by columns

In [52]:
missing_ratio(data)

Unnamed: 0,Missing Ratio
Miami,0.107962
Indianapolis,0.053981
Atlanta,0.044984
Dallas,0.026991
Boston,0.017994
Minneapolis,0.017994
Seattle,0.017994
Los Angeles,0.017994
Phoenix,0.017994
Houston,0.017994


- Replacing missing values by corresponding last valid value

In [35]:
data = data.fillna(method='ffill')

## Trying tensorboard

In [2]:
writer = SummaryWriter()

for n_iter in range(100):
    writer.add_scalar('Loss/train', np.random.random(), n_iter)
    writer.add_scalar('Loss/test', np.random.random(), n_iter)
    writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
    writer.add_scalar('Accuracy/test', np.random.random(), n_iter)