In [1]:
import numpy as np
from sklearn import preprocessing
import random

#function responsible for loading the data and making the X, Y datasets
#@nb_days meaning how many days forward do we want our prediction, e.g. nb_days = 1 meaning tomorrow
#the function returns the proper expected output for the model to work on
def load_data(nb_days = 1):
    window = nb_days + 1
    #I am using 2018 and 2019 budapest fall season data
    dataset1 = np.loadtxt('budapest_2018_daily.csv', delimiter=",")
    dataset2 = np.loadtxt('budapest_2019_daily.csv', delimiter=",")

    dataset1_len = len(dataset1)
    dataset2_len = len(dataset2)
    
    Y = np.empty(dataset1_len - window + dataset2_len - window)
    
    #calculating the 2018 expected results
    for i in range(dataset1_len - window):
        Y[i] = ((dataset1[i+window][0] + dataset1[i+window][1]) / 2)
        
    #calculating the 2019 expected results
    for i in range(dataset2_len - window):
        Y[i + dataset1_len - window] = ((dataset2[i+window][0] + dataset2[i+window][1]) / 2)
    #print(Y)
    
    
    X = np.empty((dataset1_len - window + dataset2_len - window, len(dataset1[0])*2))
    
    #calculating the 2018 input data
    for i in range(dataset1_len - window):
        X[i][0:len(dataset1[0])] = dataset1[i]
        X[i][len(dataset1[0]):] = dataset1[i + 1]
        
    #calculating the 2019 input data
    for i in range(dataset2_len - window):
        X[i + dataset1_len - window][0:len(dataset2[0])] = dataset2[i]
        X[i + dataset1_len - window][len(dataset2[0]):] = dataset2[i + 1]
    return (X, Y)

In [2]:
#shuffling the datasets
def shuffleXY(X, Y):
    np.random.seed(123)

    randperm = np.random.permutation(len(X))
    X, Y = X[randperm], Y[randperm]
    return (X, Y)

In [3]:
#splitting the data into train, valid and test datasets
def split_data():
    nb_samples = len(X)
    valid = 0.2
    test = 0.1
    X_train = X[0:(int)(nb_samples*(1-valid-test))]
    X_valid = X[(int)(nb_samples*(1-valid-test)):(int)(nb_samples*(1-test))]
    X_test = X[(int)(nb_samples*(1-test)):]

    Y_train = Y[0:(int)(nb_samples*(1-valid-test))]
    Y_valid = Y[(int)(nb_samples*(1-valid-test)):(int)(nb_samples*(1-test))]
    Y_test = Y[(int)(nb_samples*(1-test)):]

    print(len(X_train))
    print(len(X_valid))
    print(len(X_test))
    print(X[0].shape)
    
    return (X_train, X_valid, X_test, Y_train, Y_valid, Y_test)

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.callbacks import Callback, TensorBoard
from tensorflow.keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
import matplotlib.pyplot as plt
import random
from tensorflow.compat.v1 import set_random_seed
from keras.models import load_model

#making the neural network, its a simple fully connected network, with mse loss function
#and Adam optimizer
#I used early stopping to prevent overfitting and ModelCheckpoint to load back the best model
def learn():
    np.random.seed(123)
    set_random_seed(125)
    model = Sequential()
    model.add(Dense(16, activation='relu', input_shape=X[0].shape))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1))

    model.compile(loss='mse', optimizer='Adam')

    patience=20
    early_stopping=EarlyStopping(patience=patience, verbose=1)

    checkpointer=ModelCheckpoint(filepath='weights.hdf5', save_best_only=True, verbose=1)

    model.fit(X_train, Y_train, batch_size=8, epochs=150, verbose=1, validation_data=(X_valid, Y_valid), 
              shuffle=True, callbacks=[checkpointer, early_stopping])
    
    model = load_model('weights.hdf5')
    return model

In [10]:
#printing out the Mean Squared Error for the test dataset
def test_error(model):
    preds=model.predict(X_test)

    from sklearn.metrics import mean_squared_error
    test_mse = mean_squared_error(Y_test,preds)
    print("Test MSE: %f" % (test_mse))

In [25]:
import warnings
warnings.filterwarnings("ignore")

In [23]:
#nb_days meaning how many days forward do we want our prediction, e.g. nb_days = 1 meaning tomorrow
X, Y = load_data(nb_days = 1)
X, Y = shuffleXY(X, Y)
X_train, X_valid, X_test, Y_train, Y_valid, Y_test = split_data()
model_tomorrow = learn()
test_error(model_tomorrow)

123
35
18
(14,)
Epoch 1/150
 1/16 [>.............................] - ETA: 0s - loss: 597.3494
Epoch 00001: val_loss improved from inf to 209.67537, saving model to weights.hdf5
Epoch 2/150
 1/16 [>.............................] - ETA: 0s - loss: 236.9298
Epoch 00002: val_loss improved from 209.67537 to 76.70982, saving model to weights.hdf5
Epoch 3/150
 1/16 [>.............................] - ETA: 0s - loss: 63.9154
Epoch 00003: val_loss improved from 76.70982 to 48.11173, saving model to weights.hdf5
Epoch 4/150
 1/16 [>.............................] - ETA: 0s - loss: 61.7821
Epoch 00004: val_loss improved from 48.11173 to 22.97309, saving model to weights.hdf5
Epoch 5/150
 1/16 [>.............................] - ETA: 0s - loss: 17.6085
Epoch 00005: val_loss improved from 22.97309 to 11.76945, saving model to weights.hdf5
Epoch 6/150
 1/16 [>.............................] - ETA: 0s - loss: 22.6185
Epoch 00006: val_loss improved from 11.76945 to 7.05499, saving model to weights.hdf5
Ep

 1/16 [>.............................] - ETA: 0s - loss: 3.9456
Epoch 00036: val_loss did not improve from 5.11894
Epoch 37/150
 1/16 [>.............................] - ETA: 0s - loss: 6.1574
Epoch 00037: val_loss did not improve from 5.11894
Epoch 38/150
 1/16 [>.............................] - ETA: 0s - loss: 5.8815
Epoch 00038: val_loss improved from 5.11894 to 5.11131, saving model to weights.hdf5
Epoch 39/150
 1/16 [>.............................] - ETA: 0s - loss: 3.4424
Epoch 00039: val_loss did not improve from 5.11131
Epoch 40/150
 1/16 [>.............................] - ETA: 0s - loss: 0.9104
Epoch 00040: val_loss did not improve from 5.11131
Epoch 41/150
 1/16 [>.............................] - ETA: 0s - loss: 1.9322
Epoch 00041: val_loss did not improve from 5.11131
Epoch 42/150
 1/16 [>.............................] - ETA: 0s - loss: 3.8598
Epoch 00042: val_loss did not improve from 5.11131
Epoch 43/150
 1/16 [>.............................] - ETA: 0s - loss: 8.8870
Epoch 

Test MSE: 7.170969


In [26]:
#the datas are: day1 max_temperature, min_temperature, sunny_hours, 
#feel_like_tempereture, max_temperature, precipitation(mm), 
#number of day in fall e.g. 09.01 means 1, 09.03 means 3, 10.01 means 32
#and day2 with same datas
test_array = np.asarray([18, 11, 7.1, 11, 18, 0, 58, 15, 8, 0, 5.2, 15, 0, 59]).reshape(1, -1)
print("The predicton for tomorrow: " + str(model_tomorrow.predict([test_array])[0][0]) + "°C degree")
#this is for 28th of october

The predicton for tomorrow: 11.327475°C degree


In [13]:
X, Y = load_data(nb_days = 7)
X, Y = shuffleXY(X, Y)
X_train, X_valid, X_test, Y_train, Y_valid, Y_test = split_data()
model_week_forward = learn()
test_error(model_week_forward)

114
33
17
(14,)
Epoch 1/150

Epoch 00001: val_loss improved from inf to 265.69254, saving model to weights.hdf5
Epoch 2/150
 1/15 [=>............................] - ETA: 0s - loss: 169.3785
Epoch 00002: val_loss improved from 265.69254 to 97.70886, saving model to weights.hdf5
Epoch 3/150
 1/15 [=>............................] - ETA: 0s - loss: 32.4063
Epoch 00003: val_loss improved from 97.70886 to 73.80709, saving model to weights.hdf5
Epoch 4/150
 1/15 [=>............................] - ETA: 0s - loss: 165.1502
Epoch 00004: val_loss improved from 73.80709 to 49.36137, saving model to weights.hdf5
Epoch 5/150
 1/15 [=>............................] - ETA: 0s - loss: 40.9992
Epoch 00005: val_loss improved from 49.36137 to 36.13688, saving model to weights.hdf5
Epoch 6/150
 1/15 [=>............................] - ETA: 0s - loss: 37.1112
Epoch 00006: val_loss improved from 36.13688 to 27.96544, saving model to weights.hdf5
Epoch 7/150
 1/15 [=>............................] - ETA: 0s - lo

Epoch 36/150
 1/15 [=>............................] - ETA: 0s - loss: 37.3002
Epoch 00036: val_loss did not improve from 20.14436
Epoch 37/150
 1/15 [=>............................] - ETA: 0s - loss: 39.9487
Epoch 00037: val_loss improved from 20.14436 to 19.64133, saving model to weights.hdf5
Epoch 38/150
 1/15 [=>............................] - ETA: 0s - loss: 19.8927
Epoch 00038: val_loss did not improve from 19.64133
Epoch 39/150
 1/15 [=>............................] - ETA: 0s - loss: 13.5800
Epoch 00039: val_loss did not improve from 19.64133
Epoch 40/150
 1/15 [=>............................] - ETA: 0s - loss: 6.0235
Epoch 00040: val_loss did not improve from 19.64133
Epoch 41/150
 1/15 [=>............................] - ETA: 0s - loss: 26.1176
Epoch 00041: val_loss did not improve from 19.64133
Epoch 42/150
 1/15 [=>............................] - ETA: 0s - loss: 16.3936
Epoch 00042: val_loss did not improve from 19.64133
Epoch 43/150
 1/15 [=>............................] - ET

Epoch 73/150
 1/15 [=>............................] - ETA: 0s - loss: 13.4972
Epoch 00073: val_loss did not improve from 18.72513
Epoch 74/150
 1/15 [=>............................] - ETA: 0s - loss: 23.0997
Epoch 00074: val_loss did not improve from 18.72513
Epoch 75/150
 1/15 [=>............................] - ETA: 0s - loss: 19.1961
Epoch 00075: val_loss did not improve from 18.72513
Epoch 76/150
 1/15 [=>............................] - ETA: 0s - loss: 14.8490
Epoch 00076: val_loss did not improve from 18.72513
Epoch 77/150
 1/15 [=>............................] - ETA: 0s - loss: 27.9310
Epoch 00077: val_loss did not improve from 18.72513
Epoch 78/150
 1/15 [=>............................] - ETA: 0s - loss: 5.6364
Epoch 00078: val_loss did not improve from 18.72513
Epoch 79/150
 1/15 [=>............................] - ETA: 0s - loss: 30.9102
Epoch 00079: val_loss did not improve from 18.72513
Epoch 80/150
 1/15 [=>............................] - ETA: 0s - loss: 15.1720
Epoch 00080: 

In [14]:
test_array = np.asarray([18, 11, 7.1, 11, 18, 0, 58, 15, 8, 0, 5.2, 15, 0, 59]).reshape(1, -1)
print("The predicton for a week forward: " + str(model_week_forward.predict([test_array])[0][0]) + "°C degree")
#this is for 3rd of november

The predicton for a week forward: 8.846021°C degree


In [21]:
X, Y = load_data(nb_days = 28)
X, Y = shuffleXY(X, Y)
X_train, X_valid, X_test, Y_train, Y_valid, Y_test = split_data()
model_month_forward = learn()
test_error(model_month_forward)

85
24
13
(14,)
Epoch 1/150

Epoch 00001: val_loss improved from inf to 265.38562, saving model to weights.hdf5
Epoch 2/150
 1/11 [=>............................] - ETA: 0s - loss: 311.9648
Epoch 00002: val_loss improved from 265.38562 to 103.05072, saving model to weights.hdf5
Epoch 3/150
 1/11 [=>............................] - ETA: 0s - loss: 105.8429
Epoch 00003: val_loss improved from 103.05072 to 49.26326, saving model to weights.hdf5
Epoch 4/150
 1/11 [=>............................] - ETA: 0s - loss: 30.6740
Epoch 00004: val_loss did not improve from 49.26326
Epoch 5/150
 1/11 [=>............................] - ETA: 0s - loss: 22.6017
Epoch 00005: val_loss improved from 49.26326 to 43.39244, saving model to weights.hdf5
Epoch 6/150
 1/11 [=>............................] - ETA: 0s - loss: 61.0176
Epoch 00006: val_loss improved from 43.39244 to 30.66170, saving model to weights.hdf5
Epoch 7/150
 1/11 [=>............................] - ETA: 0s - loss: 19.8088
Epoch 00007: val_loss 

Epoch 34/150
 1/11 [=>............................] - ETA: 0s - loss: 23.9933
Epoch 00034: val_loss improved from 13.99224 to 13.91194, saving model to weights.hdf5
Epoch 35/150
 1/11 [=>............................] - ETA: 0s - loss: 9.6200
Epoch 00035: val_loss did not improve from 13.91194
Epoch 36/150
 1/11 [=>............................] - ETA: 0s - loss: 15.9669
Epoch 00036: val_loss did not improve from 13.91194
Epoch 37/150
 1/11 [=>............................] - ETA: 0s - loss: 26.4769
Epoch 00037: val_loss improved from 13.91194 to 13.42880, saving model to weights.hdf5
Epoch 38/150
 1/11 [=>............................] - ETA: 0s - loss: 16.7033
Epoch 00038: val_loss did not improve from 13.42880
Epoch 39/150
 1/11 [=>............................] - ETA: 0s - loss: 11.3323
Epoch 00039: val_loss did not improve from 13.42880
Epoch 40/150
 1/11 [=>............................] - ETA: 0s - loss: 25.3439
Epoch 00040: val_loss improved from 13.42880 to 13.25949, saving model to

Epoch 69/150
 1/11 [=>............................] - ETA: 0s - loss: 19.6362
Epoch 00069: val_loss did not improve from 11.36961
Epoch 70/150
 1/11 [=>............................] - ETA: 0s - loss: 9.9891
Epoch 00070: val_loss improved from 11.36961 to 11.34611, saving model to weights.hdf5
Epoch 71/150
 1/11 [=>............................] - ETA: 0s - loss: 22.7161
Epoch 00071: val_loss did not improve from 11.34611
Epoch 72/150
 1/11 [=>............................] - ETA: 0s - loss: 20.8947
Epoch 00072: val_loss improved from 11.34611 to 11.16740, saving model to weights.hdf5
Epoch 73/150
 1/11 [=>............................] - ETA: 0s - loss: 14.1124
Epoch 00073: val_loss did not improve from 11.16740
Epoch 74/150
 1/11 [=>............................] - ETA: 0s - loss: 24.3159
Epoch 00074: val_loss did not improve from 11.16740
Epoch 75/150
 1/11 [=>............................] - ETA: 0s - loss: 12.0629
Epoch 00075: val_loss did not improve from 11.16740
Epoch 76/150
 1/11 [=

Epoch 00105: early stopping
Test MSE: 13.950715


In [18]:
test_array = np.asarray([18, 11, 7.1, 11, 18, 0, 58, 15, 8, 0, 5.2, 15, 0, 59]).reshape(1, -1)
print("The predicton for a month forward: " + str(model_month_forward.predict([test_array])[0][0]) + "°C degree")
#this is for 24th of november

The predicton for a month forward: 3.6434593°C degree


In [None]:
#set your custom nb_days, how many days forward do you want your prediction
X, Y = load_data(nb_days = {'insert your number here'})
X, Y = shuffleXY(X, Y)
X_train, X_valid, X_test, Y_train, Y_valid, Y_test = split_data()
model_custom = learn()
test_error(model_custom)

In [None]:
test_array = np.asarray([18, 11, 7.1, 11, 18, 0, 58, 15, 8, 0, 5.2, 15, 0, 59]).reshape(1, -1)
print("The predicton for custom prediction: " + str(model_custom.predict([test_array])[0][0]) + "°C degree")