## In this Notebook

## 1. Importing Libraries and Data

## 2. Data Wrangling

## 3. Create Train and Test Sets

## 4. CNN Model

## 5. RNN Model

### 1. Importing Libraries and Data

In [1]:
# Import libraries

import pandas as pd
import numpy as np
import seaborn as sns
import os

In [2]:
# Set Path

path = r'C:\Users\Josh Wattay\anaconda3\ClimateWins Analysis\01 - Data'

In [3]:
# Read unscaled weather data

df = pd.read_csv(os.path.join(path, 'weather_data.csv'))

In [4]:
df.shape

(22950, 170)

In [5]:
df.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [6]:
# Read pleasant weather data

df2 = pd.read_csv(os.path.join(path, 'pleasant_weather.csv'))

In [7]:
df2.shape

(22950, 16)

In [8]:
df2.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### 2. Data Wrangling

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Columns: 170 entries, DATE to VALENTIA_temp_max
dtypes: float64(145), int64(25)
memory usage: 29.8 MB


In [10]:
pd.set_option('display.max_info_columns', 200) # to see all columns

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Data columns (total 170 columns):
 #    Column                       Non-Null Count  Dtype  
---   ------                       --------------  -----  
 0    DATE                         22950 non-null  int64  
 1    MONTH                        22950 non-null  int64  
 2    BASEL_cloud_cover            22950 non-null  int64  
 3    BASEL_wind_speed             22950 non-null  float64
 4    BASEL_humidity               22950 non-null  float64
 5    BASEL_pressure               22950 non-null  float64
 6    BASEL_global_radiation       22950 non-null  float64
 7    BASEL_precipitation          22950 non-null  float64
 8    BASEL_snow_depth             22950 non-null  int64  
 9    BASEL_sunshine               22950 non-null  float64
 10   BASEL_temp_mean              22950 non-null  float64
 11   BASEL_temp_min               22950 non-null  float64
 12   BASEL_temp_max               22950 non-null  float64
 13  

In [12]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Data columns (total 16 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   DATE                         22950 non-null  int64
 1   BASEL_pleasant_weather       22950 non-null  int64
 2   BELGRADE_pleasant_weather    22950 non-null  int64
 3   BUDAPEST_pleasant_weather    22950 non-null  int64
 4   DEBILT_pleasant_weather      22950 non-null  int64
 5   DUSSELDORF_pleasant_weather  22950 non-null  int64
 6   HEATHROW_pleasant_weather    22950 non-null  int64
 7   KASSEL_pleasant_weather      22950 non-null  int64
 8   LJUBLJANA_pleasant_weather   22950 non-null  int64
 9   MAASTRICHT_pleasant_weather  22950 non-null  int64
 10  MADRID_pleasant_weather      22950 non-null  int64
 11  MUNCHENB_pleasant_weather    22950 non-null  int64
 12  OSLO_pleasant_weather        22950 non-null  int64
 13  SONNBLICK_pleasant_weather   22950 non-null  i

In [13]:
# Remove all the columns for: GDANSK, ROMA, TOURS 
# Not enough data for these cities to include them in the pleasant weather data set
# Create a pattern 
pattern=r'(GDANSK|ROMA|TOURS)'

# Filter the columns to remove
columns_to_remove=df.filter(regex=pattern).columns

In [14]:
columns_to_remove.shape

(21,)

In [15]:
columns_to_remove #check to ensure no errors

Index(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation',
       'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min',
       'GDANSK_temp_max', 'ROMA_cloud_cover', 'ROMA_wind_speed',
       'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
       'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure',
       'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean',
       'TOURS_temp_min', 'TOURS_temp_max'],
      dtype='object')

In [16]:
# Subset by dropping these columns

df3 = df.drop(columns = columns_to_remove, inplace = False)

In [17]:
# Now our Cities match in both df2 and df3

In [18]:
df3.shape

(22950, 149)

In [19]:
# Check for Null or Missing Values

df3.isnull().sum()

DATE                   0
MONTH                  0
BASEL_cloud_cover      0
BASEL_wind_speed       0
BASEL_humidity         0
                      ..
VALENTIA_snow_depth    0
VALENTIA_sunshine      0
VALENTIA_temp_mean     0
VALENTIA_temp_min      0
VALENTIA_temp_max      0
Length: 149, dtype: int64

In [20]:
# No missing values!

In [21]:
# Snow Depth and Wind Speed are not included variables for each city
# We will subset again by using a pattern to idenitfy these variables and drop them when making a new dataframe

pattern_snow_wind = r'(wind_speed|snow_depth)'

# Filter the columns to remove

columns_to_remove_snow_wind = df3.filter(regex=pattern_snow_wind).columns 

In [22]:
columns_to_remove_snow_wind.shape

(15,)

In [23]:
columns_to_remove_snow_wind # inspect to ensure no erroneous columns

Index(['BASEL_wind_speed', 'BASEL_snow_depth', 'DEBILT_wind_speed',
       'DUSSELDORF_wind_speed', 'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
       'KASSEL_wind_speed', 'LJUBLJANA_wind_speed', 'MAASTRICHT_wind_speed',
       'MADRID_wind_speed', 'MUNCHENB_snow_depth', 'OSLO_wind_speed',
       'OSLO_snow_depth', 'SONNBLICK_wind_speed', 'VALENTIA_snow_depth'],
      dtype='object')

In [24]:
# Subset for dropping these columns

df4 = df3.drop(columns = columns_to_remove_snow_wind, inplace=False)

In [25]:
df4.shape

(22950, 134)

In [26]:
df4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Data columns (total 134 columns):
 #    Column                       Non-Null Count  Dtype  
---   ------                       --------------  -----  
 0    DATE                         22950 non-null  int64  
 1    MONTH                        22950 non-null  int64  
 2    BASEL_cloud_cover            22950 non-null  int64  
 3    BASEL_humidity               22950 non-null  float64
 4    BASEL_pressure               22950 non-null  float64
 5    BASEL_global_radiation       22950 non-null  float64
 6    BASEL_precipitation          22950 non-null  float64
 7    BASEL_sunshine               22950 non-null  float64
 8    BASEL_temp_mean              22950 non-null  float64
 9    BASEL_temp_min               22950 non-null  float64
 10   BASEL_temp_max               22950 non-null  float64
 11   BELGRADE_cloud_cover         22950 non-null  int64  
 12   BELGRADE_humidity            22950 non-null  float64
 13  

In [27]:
df4.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,19600102,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,19600103,1,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,19600104,1,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,19600105,1,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [28]:
# Subset to Drop DATE and MONTH

df5 = df4.drop(columns=['DATE', 'MONTH'], inplace=False)

In [29]:
df5.shape

(22950, 132)

In [30]:
df5.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [31]:
# Subset to Drop DATE from the Pleasant Weather (df2)

df6 = df2.drop(columns=['DATE'], inplace=False)

In [32]:
df6.shape

(22950, 15)

In [33]:
df6.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [34]:
# Export wrangled weather data set

df5.to_csv(os.path.join(path, 'weather_wrangled.csv'))

In [35]:
# Export wrangled pleasant weather data set

df6.to_csv(os.path.join(path, 'pleasant_weather_wrangled.csv'))

### 3. Create Train and Test Sets

In [36]:
# Import additional libraries

import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from numpy import reshape
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dense, Dropout
from keras.utils import np_utils

ImportError: cannot import name 'np_utils' from 'keras.utils' (C:\Users\Josh Wattay\anaconda3\Lib\site-packages\keras\api\utils\__init__.py)

In [37]:
# Read wrangled Weather Data

df_wrangled = pd.read_csv(os.path.join(path, 'weather_wrangled.csv'))

In [38]:
df_wrangled.shape

(22950, 133)

In [39]:
df_wrangled.head()

Unnamed: 0.1,Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [42]:
df_wrangled = df_wrangled.drop(columns='Unnamed: 0')

In [43]:
df_wrangled.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [44]:
# Read wrangled Pleasant Weather Data

df_pleasant = pd.read_csv(os.path.join(path, 'pleasant_weather_wrangled.csv'))

In [46]:
df_pleasant.shape

(22950, 16)

In [47]:
df_pleasant.head()

Unnamed: 0.1,Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [48]:
df_pleasant = df_pleasant.drop(columns='Unnamed: 0')

In [49]:
df_pleasant.shape

(22950, 15)

In [50]:
df_pleasant.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [51]:
# Create an 'X' matrix

X = df_wrangled
y = df_pleasant

In [52]:
X = np.array(X)
y = np.array(y)

In [53]:
X = X.reshape(-1,15,9)

In [55]:
print(X.shape)
print(y.shape)

(22440, 15, 9)
(22950, 15)


In [56]:
total_elements = X.size
print(total_elements % 135)  # This should be 0 for a perfect reshape

0


In [57]:
# Adjust y to match reshaped X

y = y[:X.shape[0]]

In [58]:
print(X.shape)
print(y.shape)

(22440, 15, 9)
(22440, 15)


In [59]:
#Split data into training and testing sets.

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [60]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17952, 15, 9) (17952, 15)
(4488, 15, 9) (4488, 15)


In [61]:
X_train

array([[[ 7.9000e+00,  5.2000e+00,  1.1000e+01, ...,  5.5000e-01,
          0.0000e+00,  0.0000e+00],
        [ 8.2000e+00,  6.4000e+00,  1.1500e+01, ...,  1.8200e+00,
          0.0000e+00,  6.6000e+00],
        [ 7.6000e+00,  4.4000e+00,  8.5000e+00, ...,  2.4400e+00,
          0.0000e+00,  5.8000e+00],
        ...,
        [ 3.0000e+00,  5.0000e-01,  1.0147e+00, ...,  1.4200e+01,
          8.2000e+00,  2.0200e+01],
        [ 6.0000e+00,  7.7000e-01,  1.0122e+00, ...,  1.0100e+01,
          7.1000e+00,  1.4300e+01],
        [ 6.0000e+00,  7.5000e-01,  1.0147e+00, ...,  6.1000e+00,
          4.1000e+00,  9.8000e+00]],

       [[ 1.8900e+00,  0.0000e+00,  8.9000e+00, ...,  5.0000e+00,
          8.2000e-01,  1.0142e+00],
        [ 1.8800e+00,  4.1000e-01,  3.4000e+00, ...,  8.0000e+00,
          8.2000e-01,  1.0059e+00],
        [ 7.7000e-01,  3.1100e+00,  2.5000e+00, ...,  1.0000e+00,
          3.8000e-01,  1.0130e+00],
        ...,
        [ 5.5000e+00,  1.2500e+01,  8.0000e+00, ...,  

In [62]:
len(X_train[0])

15

In [63]:
len(X_train[0][0])

9

In [64]:
y_train

array([[0, 1, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [1, 1, 0, ..., 0, 0, 1]], dtype=int64)

In [65]:
len(y_train[0])

15

### 4. CNN Model

In [66]:
# Define Stations

stations = {
    0: 'BASEL',
    1: 'BELGRADE',
    2: 'BUDAPEST',
    3: 'DEBILT',
    4: 'DUSSELDORF',
    5: 'HEATHROW',
    6: 'KASSEL',
    7: 'LJUBLJANA',
    8: 'MAASTRICHT',
    9: 'MADRID',
   10: 'MUNCHENB',
   11: 'OSLO',
   12: 'SONNBLICK',
   13: 'STOCKHOLM',
   14: 'VALENTIA',
}

In [67]:
# Create a Keras leyered model. Use initial hyperparameters: 8, 16, 32, softmax
epochs = 8
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [68]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [69]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/8
1122/1122 - 6s - 5ms/step - accuracy: 0.1034 - loss: 4109.7939
Epoch 2/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0926 - loss: 16371.8750
Epoch 3/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0948 - loss: 21831.8066
Epoch 4/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0917 - loss: 34837.9805
Epoch 5/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0940 - loss: 53474.6445
Epoch 6/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0938 - loss: 69176.7266
Epoch 7/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0909 - loss: 95894.3828
Epoch 8/8
1122/1122 - 3s - 3ms/step - accuracy: 0.0917 - loss: 99429.1250


<keras.src.callbacks.history.History at 0x190594412d0>

In [70]:
# Create a Keras leyered model. Use hyperparameters: 16, 32, 64, softmax
epochs = 16
batch_size = 32
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

In [72]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [73]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/16
561/561 - 5s - 10ms/step - accuracy: 0.0956 - loss: 1688.6029
Epoch 2/16
561/561 - 2s - 3ms/step - accuracy: 0.0965 - loss: 8821.6807
Epoch 3/16
561/561 - 2s - 3ms/step - accuracy: 0.0952 - loss: 15359.5596
Epoch 4/16
561/561 - 2s - 3ms/step - accuracy: 0.0917 - loss: 27884.2246
Epoch 5/16
561/561 - 2s - 3ms/step - accuracy: 0.0934 - loss: 41676.1992
Epoch 6/16
561/561 - 2s - 4ms/step - accuracy: 0.0894 - loss: 47221.6758
Epoch 7/16
561/561 - 2s - 3ms/step - accuracy: 0.0903 - loss: 61723.9727
Epoch 8/16
561/561 - 2s - 3ms/step - accuracy: 0.0917 - loss: 70095.1641
Epoch 9/16
561/561 - 2s - 3ms/step - accuracy: 0.0930 - loss: 96362.1953
Epoch 10/16
561/561 - 2s - 4ms/step - accuracy: 0.0919 - loss: 111852.8828
Epoch 11/16
561/561 - 2s - 4ms/step - accuracy: 0.0931 - loss: 122734.4297
Epoch 12/16
561/561 - 2s - 4ms/step - accuracy: 0.0949 - loss: 140448.1562
Epoch 13/16
561/561 - 2s - 4ms/step - accuracy: 0.0950 - loss: 162840.9531
Epoch 14/16
561/561 - 2s - 4ms/step - accura

<keras.src.callbacks.history.History at 0x190671bbf10>

In [74]:
# Create a Keras leyered model. Use hyperparameters: 32, 64, 128, softmax
epochs = 32
batch_size = 64
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

In [75]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [76]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/32
281/281 - 5s - 16ms/step - accuracy: 0.1272 - loss: 899.4208
Epoch 2/32
281/281 - 1s - 4ms/step - accuracy: 0.1219 - loss: 7204.4907
Epoch 3/32
281/281 - 1s - 4ms/step - accuracy: 0.1216 - loss: 16735.2734
Epoch 4/32
281/281 - 1s - 4ms/step - accuracy: 0.1112 - loss: 23676.1875
Epoch 5/32
281/281 - 1s - 4ms/step - accuracy: 0.0891 - loss: 33413.7500
Epoch 6/32
281/281 - 1s - 4ms/step - accuracy: 0.0915 - loss: 38968.4648
Epoch 7/32
281/281 - 1s - 4ms/step - accuracy: 0.0975 - loss: 53235.7812
Epoch 8/32
281/281 - 1s - 4ms/step - accuracy: 0.0955 - loss: 61612.8633
Epoch 9/32
281/281 - 1s - 4ms/step - accuracy: 0.0948 - loss: 72111.8125
Epoch 10/32
281/281 - 1s - 4ms/step - accuracy: 0.0936 - loss: 92367.4688
Epoch 11/32
281/281 - 1s - 4ms/step - accuracy: 0.0919 - loss: 103530.9844
Epoch 12/32
281/281 - 1s - 4ms/step - accuracy: 0.0982 - loss: 110239.8750
Epoch 13/32
281/281 - 1s - 4ms/step - accuracy: 0.0904 - loss: 125018.2422
Epoch 14/32
281/281 - 1s - 4ms/step - accuracy

<keras.src.callbacks.history.History at 0x19067b61b90>

In [77]:
# Create confusion matrix
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [78]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  BELGRADE  HEATHROW  MUNCHENB
True                                           
BASEL        2895        28         2         2
BELGRADE      804         8         1         0
BUDAPEST      149         1         1         0
DEBILT         71         0         0         0
DUSSELDORF     31         0         0         0
HEATHROW       82         0         0         0
KASSEL          8         0         0         0
LJUBLJANA      46         0         0         0
MAASTRICHT      6         0         0         0
MADRID        331         8         0         0
MUNCHENB        5         0         0         0
OSLO            6         0         0         0
STOCKHOLM       2         0         0         0
VALENTIA        1         0         0         0


In [79]:
# Create a Keras leyered model. Use hyperparameters: 50, 64, 128, relu
epochs = 50
batch_size = 64
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [80]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [81]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/50
281/281 - 4s - 15ms/step - accuracy: 0.0848 - loss: 13.0639
Epoch 2/50
281/281 - 1s - 4ms/step - accuracy: 0.0754 - loss: 10.8423
Epoch 3/50
281/281 - 1s - 4ms/step - accuracy: 0.0760 - loss: 10.8175
Epoch 4/50
281/281 - 1s - 4ms/step - accuracy: 0.0755 - loss: 10.8067
Epoch 5/50
281/281 - 1s - 4ms/step - accuracy: 0.0751 - loss: 10.8014
Epoch 6/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7978
Epoch 7/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7953
Epoch 8/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7931
Epoch 9/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7917
Epoch 10/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7906
Epoch 11/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7893
Epoch 12/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7882
Epoch 13/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7877
Epoch 14/50
281/281 - 1s - 4ms/step - accuracy: 0.0749 - loss: 10.7868
Epoch 15/50
28

<keras.src.callbacks.history.History at 0x190680a0650>

In [82]:
# Create a Keras leyered model. Use hyperparameters: 64, 64, 128, softmax
epochs = 64
batch_size = 64
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

In [83]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

In [84]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/64
281/281 - 4s - 14ms/step - accuracy: 0.1152 - loss: 710.9915
Epoch 2/64
281/281 - 1s - 4ms/step - accuracy: 0.0993 - loss: 6110.6890
Epoch 3/64
281/281 - 1s - 4ms/step - accuracy: 0.0986 - loss: 14500.4434
Epoch 4/64
281/281 - 1s - 4ms/step - accuracy: 0.0948 - loss: 22939.4473
Epoch 5/64
281/281 - 1s - 4ms/step - accuracy: 0.0875 - loss: 26222.8887
Epoch 6/64
281/281 - 1s - 4ms/step - accuracy: 0.0935 - loss: 39679.2031
Epoch 7/64
281/281 - 1s - 5ms/step - accuracy: 0.0919 - loss: 42456.1914
Epoch 8/64
281/281 - 1s - 4ms/step - accuracy: 0.0948 - loss: 65105.4609
Epoch 9/64
281/281 - 1s - 4ms/step - accuracy: 0.0944 - loss: 63187.1875
Epoch 10/64
281/281 - 1s - 4ms/step - accuracy: 0.0927 - loss: 89625.6719
Epoch 11/64
281/281 - 1s - 4ms/step - accuracy: 0.0928 - loss: 84993.8203
Epoch 12/64
281/281 - 1s - 4ms/step - accuracy: 0.0989 - loss: 103493.1875
Epoch 13/64
281/281 - 1s - 4ms/step - accuracy: 0.0960 - loss: 127553.3438
Epoch 14/64
281/281 - 1s - 4ms/step - accuracy:

<keras.src.callbacks.history.History at 0x1906a5e5350>

In [85]:
# Create confusion matrix
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [86]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        BASEL  BELGRADE  KASSEL  LJUBLJANA
True                                          
BASEL          16        24      12       2875
BELGRADE        1        10       1        801
BUDAPEST        0         0       0        151
DEBILT          1         1       0         69
DUSSELDORF      0         0       0         31
HEATHROW        1         2       0         79
KASSEL          0         0       0          8
LJUBLJANA       0         0       0         46
MAASTRICHT      0         1       0          5
MADRID          4         4       1        330
MUNCHENB        0         0       0          5
OSLO            0         0       0          6
STOCKHOLM       0         0       0          2
VALENTIA        0         0       0          1


### 5. RNN Model

In [87]:
# Create a Keras leyered model. Use initial hyperparameters: 8, 16, 32, softmax
epochs = 8
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax')) #Don't use relu here!

  super().__init__(**kwargs)


In [88]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [89]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.1366 - loss: 10.0794 - val_accuracy: 0.0758 - val_loss: 9.4079
Epoch 2/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.1151 - loss: 10.5950 - val_accuracy: 0.0760 - val_loss: 9.7343
Epoch 3/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.1019 - loss: 10.4840 - val_accuracy: 0.0758 - val_loss: 10.1375
Epoch 4/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.1015 - loss: 11.0031 - val_accuracy: 0.0758 - val_loss: 10.5062
Epoch 5/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.0871 - loss: 10.7622 - val_accuracy: 0.0758 - val_loss: 10.8642
Epoch 6/8
[1m1122/1122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.0808 - loss: 11.1825 - val_accuracy: 0.0755 - val_loss: 11.3190
Epoch 7/8


<keras.src.callbacks.history.History at 0x1906acd3810>

In [90]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [91]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
Pred        KASSEL  MADRID  OSLO  SONNBLICK  VALENTIA
True                                                 
BASEL            1    2914     1         10         1
BELGRADE         0     811     0          2         0
BUDAPEST         0     151     0          0         0
DEBILT           0      71     0          0         0
DUSSELDORF       0      31     0          0         0
HEATHROW         0      81     0          1         0
KASSEL           0       8     0          0         0
LJUBLJANA        0      46     0          0         0
MAASTRICHT       0       6     0          0         0
MADRID           0     339     0          0         0
MUNCHENB         0       5     0          0         0
OSLO             0       6     0          0         0
STOCKHOLM        0       2     0          0         0
VALENTIA         0       1     0          0         0


In [92]:
# Create a Keras leyered model. Change hyperparameters: 20, 32, 128, softmax
epochs = 20
batch_size = 32
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax')) #Don't use relu here!

  super().__init__(**kwargs)


In [93]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [94]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.1030 - loss: 10.9150 - val_accuracy: 0.0755 - val_loss: 9.4673
Epoch 2/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.0993 - loss: 11.7378 - val_accuracy: 0.0755 - val_loss: 10.2060
Epoch 3/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.1019 - loss: 12.0216 - val_accuracy: 0.0755 - val_loss: 10.9141
Epoch 4/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.0977 - loss: 12.6308 - val_accuracy: 0.0753 - val_loss: 11.6025
Epoch 5/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.1012 - loss: 12.8973 - val_accuracy: 0.0760 - val_loss: 12.2399
Epoch 6/20
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.0974 - loss: 13.3442 - val_accuracy: 0.0755 - val_loss: 12.7750
Epoch 7/20

<keras.src.callbacks.history.History at 0x1906c391b90>

In [95]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])
    
    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [96]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step
Pred        BELGRADE  BUDAPEST  MAASTRICHT  MADRID  SONNBLICK  VALENTIA
True                                                                   
BASEL              2         1           1    2920          3         0
BELGRADE           0         0           0     811          2         0
BUDAPEST           0         0           0     151          0         0
DEBILT             0         0           0      71          0         0
DUSSELDORF         0         0           0      31          0         0
HEATHROW           0         0           0      82          0         0
KASSEL             0         0           0       8          0         0
LJUBLJANA          0         0           0      46          0         0
MAASTRICHT         0         0           0       6          0         0
MADRID             0         0           0     338          0         1
MUNCHENB           0         0           0       5          

In [97]:
# Create a Keras leyered model. Change activation type: 32, 32, 128, sigmoid
epochs = 32
batch_size = 32
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='sigmoid')) #Don't use relu here!

  super().__init__(**kwargs)


In [98]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [99]:
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.0823 - loss: 10.8632 - val_accuracy: 0.0758 - val_loss: 9.5703
Epoch 2/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.1045 - loss: 11.4658 - val_accuracy: 0.0755 - val_loss: 10.2723
Epoch 3/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.1054 - loss: 11.7448 - val_accuracy: 0.0755 - val_loss: 10.7868
Epoch 4/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.1011 - loss: 12.1066 - val_accuracy: 0.0755 - val_loss: 11.5119
Epoch 5/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - accuracy: 0.0991 - loss: 12.6186 - val_accuracy: 0.0755 - val_loss: 12.1782
Epoch 6/32
[1m561/561[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.0999 - loss: 13.3221 - val_accuracy: 0.0755 - val_loss: 12.8080
Epoch 7/32

<keras.src.callbacks.history.History at 0x1906c451b90>

In [100]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [101]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Pred        BASEL  SONNBLICK
True                        
BASEL        2925          2
BELGRADE      812          1
BUDAPEST      151          0
DEBILT         71          0
DUSSELDORF     31          0
HEATHROW       82          0
KASSEL          8          0
LJUBLJANA      46          0
MAASTRICHT      6          0
MADRID        339          0
MUNCHENB        5          0
OSLO            6          0
STOCKHOLM       2          0
VALENTIA        1          0
