### Data Processing & Feature Engineering

In [1]:
import pandas as pd

ten_minute_df = pd.read_csv("jena_climate_2009_2016.csv")

In [20]:
hourly_df = ten_minute_df[5::6] 
hourly_df.index = pd.to_datetime(hourly_df['Date Time'], format='%d.%m.%Y %H:%M:%S')


In [10]:
hourly_df.columns

Index(['Date Time', 'p (mbar)', 'T (degC)', 'Tpot (K)', 'Tdew (degC)',
       'rh (%)', 'VPmax (mbar)', 'VPact (mbar)', 'VPdef (mbar)', 'sh (g/kg)',
       'H2OC (mmol/mol)', 'rho (g/m**3)', 'wv (m/s)', 'max. wv (m/s)',
       'wd (deg)'],
      dtype='object')

In [21]:
hourly_df = hourly_df[['Date Time', 'p (mbar)', 'T (degC)', 'Tdew (degC)',
       'rh (%)', 'VPmax (mbar)']]
hourly_df = hourly_df.rename(columns={'p (mbar)': 'p', 'T (degC)':'temp', 'Tdew (degC)': 'tdew',
       'rh (%)': 'rh', 'VPmax (mbar)': 'vpm'})

In [22]:
hourly_df.head()

Unnamed: 0_level_0,Date Time,p,temp,tdew,rh,vpm
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-01-01 01:00:00,01.01.2009 01:00:00,996.5,-8.05,-8.78,94.4,3.33
2009-01-01 02:00:00,01.01.2009 02:00:00,996.62,-8.88,-9.77,93.2,3.12
2009-01-01 03:00:00,01.01.2009 03:00:00,996.84,-8.81,-9.66,93.5,3.13
2009-01-01 04:00:00,01.01.2009 04:00:00,996.99,-9.05,-10.02,92.6,3.07
2009-01-01 05:00:00,01.01.2009 05:00:00,997.46,-9.63,-10.65,92.2,2.94


In [26]:
train_df = hourly_df[:60000]
valid_df = hourly_df[60000:65000]
test_df = hourly_df[65000:]

In [23]:
import numpy as np

def build_feature_target_vectors(df, input_window, pred_window):
    """
    Create input and output sequences for a model training.
    """
    
    input_hours = input_window * 24
    prediction_hours = pred_window * 24
    
    features_df = df.drop('Date Time', axis=1)
    
    X, Y = [], []
    
    for i in range(0, len(features_df) - input_hours - prediction_hours + 1, 24):
        x_sequence = features_df.iloc[i:i+input_hours].values
        y_sequence = features_df.iloc[i+input_hours:i+input_hours+prediction_hours]['temp'].values
        
        X.append(x_sequence)
        Y.append(y_sequence)
    
    return np.array(X), np.array(Y)


In [30]:
# given 30 days of feature data, make a prediction for temp (hourly) for the next 7 days

input_window = 30
pred_window = 7

In [27]:
X_train, Y_train = build_feature_target_vectors(train_df, input_window, pred_window)
X_valid, Y_valid = build_feature_target_vectors(valid_df, input_window, pred_window)
X_test, Y_test = build_feature_target_vectors(test_df, input_window, pred_window)

### Model Architecture & Training