### Keras Part 1

### 1. Importing Libraries and Data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [2]:
path = r'C:\Users\dmksk\OneDrive\Desktop\CF\Macine Learning with Python\Ex 2.2\Data'

In [3]:
# Load the CSV file with the correct column names
pleasant = pd.read_csv(os.path.join(path, 'Pleasant_Weather.csv'), header=0, index_col= False)

# Display the first few rows to confirm
pleasant.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [4]:
pleasant.shape

(22950, 16)

In [5]:
# Load the CSV file with the correct column names
unscaled = pd.read_csv(os.path.join(path, 'Dataset_Weather.csv'), header=0, index_col= False)

# Display the first few rows to confirm
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


### 2. Data Wrangling

In [6]:
# Drop the columns related to Tours, Gdansk and Rome from the unscaled dataset
# Every set of observations needs to be the same length. Three of the weather stations don’t have enough and need to be dropped from the data set. You’ll find them already removed from the predictions data set.

unscaled = unscaled.drop(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
                        'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                        'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [7]:
unscaled.shape

(22950, 149)

In [8]:
# Extract the different observation types

observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [9]:
# Two types of observations are missing multiple years for most weather stations. Remove them.
# Create a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Count the number of stations (i.e., the number of columns) for the current observation type
    station_counts[obs] = len(columns)

# Print the count of stations for each observation type
print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")

Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


In [10]:
# Drop columns that end with wind_speed and snow_depth from the dataset

columns_to_drop = unscaled.filter(regex='(_wind_speed|_snow_depth)$').columns
columns_to_drop

Index(['BASEL_wind_speed', 'BASEL_snow_depth', 'DEBILT_wind_speed',
       'DUSSELDORF_wind_speed', 'DUSSELDORF_snow_depth', 'HEATHROW_snow_depth',
       'KASSEL_wind_speed', 'LJUBLJANA_wind_speed', 'MAASTRICHT_wind_speed',
       'MADRID_wind_speed', 'MUNCHENB_snow_depth', 'OSLO_wind_speed',
       'OSLO_snow_depth', 'SONNBLICK_wind_speed', 'VALENTIA_snow_depth'],
      dtype='object')

In [11]:
unscaled = unscaled.drop(columns=columns_to_drop)

In [12]:
unscaled.shape

(22950, 134)

In [13]:
# Create a list of all unique station names in the dataset

all_stations = set([col.split('_')[0] for col in unscaled.columns if '_' in col])
all_stations

{'BASEL',
 'BELGRADE',
 'BUDAPEST',
 'DEBILT',
 'DUSSELDORF',
 'HEATHROW',
 'KASSEL',
 'LJUBLJANA',
 'MAASTRICHT',
 'MADRID',
 'MUNCHENB',
 'OSLO',
 'SONNBLICK',
 'STOCKHOLM',
 'VALENTIA'}

In [14]:
# There are three individual observations that need to be filled in. Assume nearby stations have similar weather, then pick one to copy the data from (Ljubljana is near Kassel, Sonnblick is near Munchen, and Olso is close enough to Stockholm).

In [15]:
observation_types = ['cloud_cover', 'humidity', 'pressure']

missing_stations_by_observation = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Extract station names by removing the observation type from the column names
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    
    # Identify stations that are in all_stations but missing from the current observation type
    missing_stations = all_stations - station_names
    
    # Store the missing station names in the dictionary
    missing_stations_by_observation[obs] = missing_stations

# Print the missing station names for each observation type
for obs, missing_stations in missing_stations_by_observation.items():
    print(f"\nStations missing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Stations missing from cloud_cover:
KASSEL

Stations missing from humidity:
STOCKHOLM

Stations missing from pressure:
MUNCHENB


##### 1 entry is missing for multiple observations: cloud_cover, humidity, and pressure. 


In [16]:
unscaled.shape

(22950, 134)

In [17]:
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,19600102,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,19600103,1,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,19600104,1,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,19600105,1,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [18]:
# Display all column names in the DataFrame
print("Column names in the DataFrame:")
print(unscaled.columns.tolist())

# Display the first few rows of the DataFrame
print("\nFirst few rows of the DataFrame:")
print(unscaled.head())


Column names in the DataFrame:
['DATE', 'MONTH', 'BASEL_cloud_cover', 'BASEL_humidity', 'BASEL_pressure', 'BASEL_global_radiation', 'BASEL_precipitation', 'BASEL_sunshine', 'BASEL_temp_mean', 'BASEL_temp_min', 'BASEL_temp_max', 'BELGRADE_cloud_cover', 'BELGRADE_humidity', 'BELGRADE_pressure', 'BELGRADE_global_radiation', 'BELGRADE_precipitation', 'BELGRADE_sunshine', 'BELGRADE_temp_mean', 'BELGRADE_temp_min', 'BELGRADE_temp_max', 'BUDAPEST_cloud_cover', 'BUDAPEST_humidity', 'BUDAPEST_pressure', 'BUDAPEST_global_radiation', 'BUDAPEST_precipitation', 'BUDAPEST_sunshine', 'BUDAPEST_temp_mean', 'BUDAPEST_temp_min', 'BUDAPEST_temp_max', 'DEBILT_cloud_cover', 'DEBILT_humidity', 'DEBILT_pressure', 'DEBILT_global_radiation', 'DEBILT_precipitation', 'DEBILT_sunshine', 'DEBILT_temp_mean', 'DEBILT_temp_min', 'DEBILT_temp_max', 'DUSSELDORF_cloud_cover', 'DUSSELDORF_humidity', 'DUSSELDORF_pressure', 'DUSSELDORF_global_radiation', 'DUSSELDORF_precipitation', 'DUSSELDORF_sunshine', 'DUSSELDORF_temp_m

In [19]:
# Mapping of new column names to existing columns (nearest stations)
column_mapping = {
    'KASSEL_cloud_cover': 'LJUBLJANA_cloud_cover',
    'STOCKHOLM_humidity': 'OSLO_humidity',
    'MUNCHENB_pressure': 'SONNBLICK_pressure'
}

# Copy data to the new columns
for new_column, source_column in column_mapping.items():
    unscaled[new_column] = unscaled[source_column]

# Create a DataFrame with only the newly added columns
new_columns_df = unscaled[list(column_mapping.keys())]

# Display the DataFrame with only the new columns
print("Updated DataFrame with only the newly created columns:")
print(new_columns_df.head())




Updated DataFrame with only the newly created columns:
   KASSEL_cloud_cover  STOCKHOLM_humidity  MUNCHENB_pressure
0                   8                0.98             1.0304
1                   6                0.62             1.0292
2                   8                0.69             1.0320
3                   6                0.98             1.0443
4                   7                0.96             1.0430


In [20]:
# Mapping of new column names to existing columns (nearest stations)
column_mapping = {
    'KASSEL_cloud_cover': 'LJUBLJANA_cloud_cover',
    'STOCKHOLM_humidity': 'OSLO_humidity',
    'MUNCHENB_pressure': 'SONNBLICK_pressure'
}

# Create new columns and also create source columns indicating where the data was copied from
for new_column, source_column in column_mapping.items():
    unscaled[new_column] = unscaled[source_column]
    unscaled[f"{new_column}_source"] = source_column

# Create a DataFrame with only the new columns and their sources
new_columns_with_sources = unscaled[[col for pair in column_mapping.items() for col in pair]]

# Display the DataFrame with the new columns and their source columns
print("Updated DataFrame with new columns and their sources:")
print(new_columns_with_sources.head())


Updated DataFrame with new columns and their sources:
   KASSEL_cloud_cover  LJUBLJANA_cloud_cover  STOCKHOLM_humidity  \
0                   8                      8                0.98   
1                   6                      6                0.62   
2                   8                      8                0.69   
3                   6                      6                0.98   
4                   7                      7                0.96   

   OSLO_humidity  MUNCHENB_pressure  SONNBLICK_pressure  
0           0.98             1.0304              1.0304  
1           0.62             1.0292              1.0292  
2           0.69             1.0320              1.0320  
3           0.98             1.0443              1.0443  
4           0.96             1.0430              1.0430  


In [21]:
unscaled.shape # observations dataset has the correct shape before drop 'DATE','MONTH'

(22950, 140)

In [22]:
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max,KASSEL_cloud_cover,STOCKHOLM_humidity,MUNCHENB_pressure,KASSEL_cloud_cover_source,STOCKHOLM_humidity_source,MUNCHENB_pressure_source
0,19600101,1,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,...,4.7,8.5,6.0,10.9,8,0.98,1.0304,LJUBLJANA_cloud_cover,OSLO_humidity,SONNBLICK_pressure
1,19600102,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,...,0.7,8.9,5.6,12.1,6,0.62,1.0292,LJUBLJANA_cloud_cover,OSLO_humidity,SONNBLICK_pressure
2,19600103,1,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,...,0.1,10.5,8.1,12.9,8,0.69,1.032,LJUBLJANA_cloud_cover,OSLO_humidity,SONNBLICK_pressure
3,19600104,1,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,...,0.0,7.4,7.3,10.6,6,0.98,1.0443,LJUBLJANA_cloud_cover,OSLO_humidity,SONNBLICK_pressure
4,19600105,1,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,...,5.7,5.7,3.0,8.4,7,0.96,1.043,LJUBLJANA_cloud_cover,OSLO_humidity,SONNBLICK_pressure


In [23]:
# Display all column names in the DataFrame
print("Column names in the DataFrame:")
print(unscaled.columns.tolist())

Column names in the DataFrame:
['DATE', 'MONTH', 'BASEL_cloud_cover', 'BASEL_humidity', 'BASEL_pressure', 'BASEL_global_radiation', 'BASEL_precipitation', 'BASEL_sunshine', 'BASEL_temp_mean', 'BASEL_temp_min', 'BASEL_temp_max', 'BELGRADE_cloud_cover', 'BELGRADE_humidity', 'BELGRADE_pressure', 'BELGRADE_global_radiation', 'BELGRADE_precipitation', 'BELGRADE_sunshine', 'BELGRADE_temp_mean', 'BELGRADE_temp_min', 'BELGRADE_temp_max', 'BUDAPEST_cloud_cover', 'BUDAPEST_humidity', 'BUDAPEST_pressure', 'BUDAPEST_global_radiation', 'BUDAPEST_precipitation', 'BUDAPEST_sunshine', 'BUDAPEST_temp_mean', 'BUDAPEST_temp_min', 'BUDAPEST_temp_max', 'DEBILT_cloud_cover', 'DEBILT_humidity', 'DEBILT_pressure', 'DEBILT_global_radiation', 'DEBILT_precipitation', 'DEBILT_sunshine', 'DEBILT_temp_mean', 'DEBILT_temp_min', 'DEBILT_temp_max', 'DUSSELDORF_cloud_cover', 'DUSSELDORF_humidity', 'DUSSELDORF_pressure', 'DUSSELDORF_global_radiation', 'DUSSELDORF_precipitation', 'DUSSELDORF_sunshine', 'DUSSELDORF_temp_m

In [24]:
# List of columns to drop
columns_to_drop = ['KASSEL_cloud_cover_source', 'STOCKHOLM_humidity_source', 'MUNCHENB_pressure_source']

# Drop the specified columns
unscaled = unscaled.drop(columns=columns_to_drop, errors='ignore')

# Display the updated DataFrame
print("Updated DataFrame after dropping the specified columns:")
print(unscaled.head())



Updated DataFrame after dropping the specified columns:
       DATE  MONTH  BASEL_cloud_cover  BASEL_humidity  BASEL_pressure  \
0  19600101      1                  7            0.85           1.018   
1  19600102      1                  6            0.84           1.018   
2  19600103      1                  8            0.90           1.018   
3  19600104      1                  3            0.92           1.018   
4  19600105      1                  6            0.95           1.018   

   BASEL_global_radiation  BASEL_precipitation  BASEL_sunshine  \
0                    0.32                 0.09             0.7   
1                    0.36                 1.05             1.1   
2                    0.18                 0.30             0.0   
3                    0.58                 0.00             4.1   
4                    0.65                 0.14             5.4   

   BASEL_temp_mean  BASEL_temp_min  ...  VALENTIA_pressure  \
0              6.5             0.8  ...       

In [25]:
unscaled.shape

(22950, 137)

In [26]:
# Drop unnecessary columns

unscaled.drop(['DATE', 'MONTH'], axis=1, inplace=True)

In [27]:
unscaled.shape # observations dataset has the correct shape

(22950, 135)

In [28]:
pleasant.shape

(22950, 16)

In [29]:
pleasant.drop(columns = 'DATE', inplace = True)

In [30]:
pleasant.shape

(22950, 15)

In [31]:
# Export cleaned dataset

unscaled.to_csv(os.path.join(path, 'X_cleaned.csv'), index=False)

### 3. Reshaping for modeling
- Ensure the layers can be fed to the deep learning model correctly.
- You’ll need to split the observations (X) into 15 groups of 9 types of observations, and your labels (y) should also be in 15 groups (it doesn’t need to be transformed or reshaped).
- The final shapes should be X = (22950, 15, 9) and y = (22950, 15).

In [32]:
X = pd.read_csv(os.path.join(path, 'X_cleaned.csv'), index_col = False)

In [33]:
y = pleasant

In [34]:
y.shape

(22950, 15)

In [35]:
# Turn X and y from a df to arrays

X = np.array(X)
y = np.array(y)

In [36]:
X = X.reshape(-1,15,9)

In [37]:
X.shape

(22950, 15, 9)

In [38]:
# Verify shape

X

array([[[ 7.0000e+00,  8.5000e-01,  1.0180e+00, ...,  6.5000e+00,
          8.0000e-01,  1.0900e+01],
        [ 1.0000e+00,  8.1000e-01,  1.0195e+00, ...,  3.7000e+00,
         -9.0000e-01,  7.9000e+00],
        [ 4.0000e+00,  6.7000e-01,  1.0170e+00, ...,  2.4000e+00,
         -4.0000e-01,  5.1000e+00],
        ...,
        [ 1.0304e+00,  4.8000e-01,  1.0000e-02, ..., -3.2000e+00,
          5.0000e+00,  1.0114e+00],
        [ 5.0000e-02,  3.2000e-01,  0.0000e+00, ...,  5.0000e+00,
          8.8000e-01,  1.0003e+00],
        [ 4.5000e-01,  3.4000e-01,  4.7000e+00, ...,  8.0000e+00,
          9.8000e-01,  1.0304e+00]],

       [[ 6.0000e+00,  8.4000e-01,  1.0180e+00, ...,  6.1000e+00,
          3.3000e+00,  1.0100e+01],
        [ 6.0000e+00,  8.4000e-01,  1.0172e+00, ...,  2.9000e+00,
          2.2000e+00,  4.4000e+00],
        [ 4.0000e+00,  6.7000e-01,  1.0170e+00, ...,  2.3000e+00,
          1.4000e+00,  3.1000e+00],
        ...,
        [ 1.0292e+00,  2.1000e-01,  6.1000e-01, ..., -

### 4. Data Split 

In [39]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [40]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


### 5. Creating Keras Model

In [41]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [42]:
model.summary()

### 6. Compiling and Running

In [43]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [44]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 5s - 5ms/step - accuracy: 0.1125 - loss: 6222.5947
Epoch 2/30
1076/1076 - 4s - 4ms/step - accuracy: 0.1235 - loss: 60038.9570
Epoch 3/30
1076/1076 - 4s - 4ms/step - accuracy: 0.1278 - loss: 212273.4531
Epoch 4/30
1076/1076 - 5s - 5ms/step - accuracy: 0.1277 - loss: 450068.5625
Epoch 5/30
1076/1076 - 4s - 4ms/step - accuracy: 0.1299 - loss: 783681.8750
Epoch 6/30
1076/1076 - 4s - 4ms/step - accuracy: 0.1272 - loss: 1208232.2500
Epoch 7/30
1076/1076 - 5s - 5ms/step - accuracy: 0.1279 - loss: 1777504.3750
Epoch 8/30
1076/1076 - 6s - 5ms/step - accuracy: 0.1265 - loss: 2383678.2500
Epoch 9/30
1076/1076 - 6s - 5ms/step - accuracy: 0.1310 - loss: 3163902.5000
Epoch 10/30
1076/1076 - 5s - 4ms/step - accuracy: 0.1310 - loss: 4151258.5000
Epoch 11/30
1076/1076 - 4s - 4ms/step - accuracy: 0.1263 - loss: 5187986.5000
Epoch 12/30
1076/1076 - 5s - 5ms/step - accuracy: 0.1293 - loss: 6438430.5000
Epoch 13/30
1076/1076 - 5s - 5ms/step - accuracy: 0.1269 - loss: 7805362.0000
Epo

<keras.src.callbacks.history.History at 0x14310df5cd0>

### 7. Creating Confusion Matrix

In [45]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [46]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [47]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Pred        BELGRADE  DEBILT  DUSSELDORF  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                    
BASEL            364      36          54       404      14       2027   
BELGRADE         149       0           0        18       0        889   
BUDAPEST          20       0           0         7       0        174   
DEBILT             1       1           0         8       0         58   
DUSSELDORF         0       0           0         3       0         22   
HEATHROW           1       0           0        16       0         52   
KASSEL             0       1           0         1       0          8   
LJUBLJANA          2       0           0         1       0         56   
MAASTRICHT         0       1           0         1       0          6   
MADRID            39       3           0        80       0        231   
MUNCHENB           0       0           0         

In [48]:
from sklearn.metrics import accuracy_score

# Replace `model` with the actual instance of your trained model
y_pred = model.predict(X_test)  # Generate predictions from your model

# Convert one-hot encoded arrays to label indices
true_labels = np.argmax(y_test, axis=1)
predicted_labels = np.argmax(y_pred, axis=1)

# Calculate and print accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step

Accuracy: 5.54%


### Keras Model Retrials 2

In [63]:
epochs = 30
batch_size = 16
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu')) # Options: sigmoid, tanh, softmax, relu

In [64]:
model.summary()

In [65]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [66]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 9s - 8ms/step - accuracy: 0.1357 - loss: 34.9197
Epoch 2/30
1076/1076 - 7s - 6ms/step - accuracy: 0.0765 - loss: 27.5187
Epoch 3/30
1076/1076 - 10s - 10ms/step - accuracy: 0.0751 - loss: 27.4742
Epoch 4/30
1076/1076 - 10s - 9ms/step - accuracy: 0.0926 - loss: 27.4104
Epoch 5/30
1076/1076 - 11s - 10ms/step - accuracy: 0.0910 - loss: 27.1038
Epoch 6/30
1076/1076 - 11s - 10ms/step - accuracy: 0.0944 - loss: 23.1832
Epoch 7/30
1076/1076 - 7s - 7ms/step - accuracy: 0.5650 - loss: nan
Epoch 8/30
1076/1076 - 7s - 6ms/step - accuracy: 0.6440 - loss: nan
Epoch 9/30
1076/1076 - 9s - 9ms/step - accuracy: 0.6440 - loss: nan
Epoch 10/30
1076/1076 - 11s - 11ms/step - accuracy: 0.6440 - loss: nan
Epoch 11/30
1076/1076 - 7s - 6ms/step - accuracy: 0.6440 - loss: nan
Epoch 12/30
1076/1076 - 10s - 10ms/step - accuracy: 0.6440 - loss: nan
Epoch 13/30
1076/1076 - 11s - 10ms/step - accuracy: 0.6440 - loss: nan
Epoch 14/30
1076/1076 - 7s - 7ms/step - accuracy: 0.6440 - loss: nan
Epoch 

<keras.src.callbacks.history.History at 0x14325f2da10>

In [67]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [68]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [69]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1


In [70]:
from sklearn.metrics import accuracy_score

# Replace `model` with the actual instance of your trained model
y_pred = model.predict(X_test)  # Generate predictions from your model

# Convert one-hot encoded arrays to label indices
true_labels = np.argmax(y_test, axis=1)
predicted_labels = np.argmax(y_pred, axis=1)

# Calculate and print accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

Accuracy: 64.17%


### Keras Model Retrials 3

In [71]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [72]:
model.summary()

In [73]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [74]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 11s - 10ms/step - accuracy: 0.4043 - loss: 28.2702
Epoch 2/30
1076/1076 - 8s - 7ms/step - accuracy: 0.4705 - loss: 28.3303
Epoch 3/30
1076/1076 - 7s - 7ms/step - accuracy: 0.4713 - loss: 28.3321
Epoch 4/30
1076/1076 - 11s - 10ms/step - accuracy: 0.4722 - loss: 28.3349
Epoch 5/30
1076/1076 - 10s - 9ms/step - accuracy: 0.4726 - loss: 28.3359
Epoch 6/30
1076/1076 - 10s - 10ms/step - accuracy: 0.4729 - loss: 28.3378
Epoch 7/30
1076/1076 - 10s - 9ms/step - accuracy: 0.4728 - loss: 28.3378
Epoch 8/30
1076/1076 - 8s - 7ms/step - accuracy: 0.4728 - loss: 28.3377
Epoch 9/30
1076/1076 - 8s - 7ms/step - accuracy: 0.4726 - loss: 28.3377
Epoch 10/30
1076/1076 - 9s - 8ms/step - accuracy: 0.4726 - loss: 28.3387
Epoch 11/30
1076/1076 - 5s - 5ms/step - accuracy: 0.4725 - loss: 28.3378
Epoch 12/30
1076/1076 - 8s - 7ms/step - accuracy: 0.4724 - loss: 28.3377
Epoch 13/30
1076/1076 - 8s - 7ms/step - accuracy: 0.4723 - loss: 28.3378
Epoch 14/30
1076/1076 - 11s - 11ms/step - accuracy: 

<keras.src.callbacks.history.History at 0x14326435790>

In [75]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  DEBILT  DUSSELDORF  LJUBLJANA  MAASTRICHT  MADRID
True                                                                
BASEL        2661       1          16         15         857     132
BELGRADE     1049       1           1          9          30       2
BUDAPEST      212       1           0          1           0       0
DEBILT         82       0           0          0           0       0
DUSSELDORF     29       0           0          0           0       0
HEATHROW       80       0           0          0           2       0
KASSEL         11       0           0          0           0       0
LJUBLJANA      61       0           0          0           0       0
MAASTRICHT      9       0           0          0           0       0
MADRID        425       0           0          1          21      11
MUNCHENB        8       0           0          0           0       0
OSLO            5       0   

In [76]:
from sklearn.metrics import accuracy_score

# Replace `model` with the actual instance of your trained model
y_pred = model.predict(X_test)  # Generate predictions from your model

# Convert one-hot encoded arrays to label indices
true_labels = np.argmax(y_test, axis=1)
predicted_labels = np.argmax(y_pred, axis=1)

# Calculate and print accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"\nAccuracy: {accuracy * 100:.2f}%")

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

Accuracy: 46.57%
