In [83]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score

from data_processing_functions import *

## 1. Read Data

In [84]:
data_train, data_test = get_train_and_test_data('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True)
x_data_train, y_data_wind_train, y_data_temperature_train = data_train
x_data_test, y_data_wind_test, y_data_temperature_test = data_test

In [99]:
model = Sequential()
for layer_size in (128,8):
    model.add(Dense(layer_size, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(len(np.unique(y_data_wind_train)), activation='softmax'))
model.compile(loss=['sparse_categorical_crossentropy'], optimizer='adam')


In [100]:
model.fit(x_data_train, y_data_wind_train, epochs=5, batch_size=32, verbose=2)
yhat = model.predict(x_data_test)
yhat = np.argmax(yhat, axis=-1).astype('int')
acc = accuracy_score(y_data_wind_test, yhat)

Epoch 1/5
1376/1376 - 1s - loss: 1.0624
Epoch 2/5
1376/1376 - 0s - loss: 0.4137
Epoch 3/5
1376/1376 - 0s - loss: 0.4133
Epoch 4/5
1376/1376 - 0s - loss: 0.4133
Epoch 5/5
1376/1376 - 0s - loss: 0.4133


In [101]:
acc

0.7496385044412311

## 2. Wind classification

In [18]:
clf = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(500), random_state=1)

In [20]:
clf.fit(x_data_train, y_data_wind_train)

MLPClassifier(alpha=0.001, hidden_layer_sizes=500, random_state=1)

In [21]:
accuracy_score(clf.predict(x_data_test), y_data_wind_test)

0.8554036251306046

In [11]:
clf = MLPClassifier(alpha=1e-5, solver='adam', hidden_layer_sizes=(100, 20), random_state=1, n_iter_no_change= 100)
clf.fit(x_data_train, y_data_wind_train)
predicted = clf.predict(x_data_test)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test, predicted)))

Accuracy: 0.85538
Resampled balanced accuracy: 0.50000




In [12]:
np.unique(predicted)

array([0], dtype=uint8)

## 3. Temperature prediction

In [23]:
regr = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)

In [24]:
regr.fit(x_data_train, y_data_temperature_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLPRegressor(alpha=1e-05, hidden_layer_sizes=(100, 20), random_state=1,
             solver='lbfgs')

In [25]:
err = abs(regr.predict(x_data_test) - y_data_temperature_test)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 5.05 RMSE: 6.92


In [36]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train, y_data_temperature_train)
err = abs(regr.predict(x_data_test) - y_data_temperature_test)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 5.16 RMSE: 6.92


# Study issue IV

### Zastąpienie null średnią

In [7]:
data_train, data_test = get_train_and_test_data('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True, remove_nulls = 2)
x_data_train, y_data_wind_train, y_data_temperature_train = data_train
x_data_test, y_data_wind_test, y_data_temperature_test = data_test

  df[df["datetime"].str.startswith(date)] = df_tmp.fillna(df_tmp.mean())


In [10]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train, y_data_wind_train)
predicted = clf.predict(x_data_test)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test, predicted)))

Accuracy: 0.73477
Resampled balanced accuracy: 0.50000


In [11]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train, y_data_temperature_train)
err = abs(regr.predict(x_data_test) - y_data_temperature_test)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 5.05 RMSE: 6.64


### Zastąpienie null wartościami powyżej i poniżej

In [11]:
data_train, data_test = get_train_and_test_data('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True, remove_nulls = 3)
x_data_train, y_data_wind_train, y_data_temperature_train = data_train
x_data_test, y_data_wind_test, y_data_temperature_test = data_test

In [12]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train, y_data_wind_train)
predicted = clf.predict(x_data_test)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test, predicted)))

Accuracy: 0.73942
Resampled balanced accuracy: 0.50000


In [13]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train, y_data_temperature_train)
err = abs(regr.predict(x_data_test) - y_data_temperature_test)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 5.24 RMSE: 6.77


# Study issue V

In [4]:
data_train, dest_test = get_train_and_test_data_with_separate_day('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True)
x_data_train, y_data_wind_train1, y_data_temperature_train1, y_data_wind_train2, y_data_temperature_train2 = data_train
x_data_test, y_data_wind_test1, y_data_temperature_test1, y_data_wind_test2, y_data_temperature_test2 = data_train

In [28]:
clf1 = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf2 = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf1.fit(x_data_train, y_data_wind_train1)
predicted1 = clf1.predict(x_data_train)
clf2.fit(predicted1.reshape(-1, 1), y_data_wind_train2)

predicted2 = clf1.predict(x_data_test)
predicted = clf2.predict(predicted2.reshape(-1, 1))
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test2, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test2, predicted)))

Accuracy: 0.85538
Resampled balanced accuracy: 0.50000


In [27]:
regr1 = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr2 = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr1.fit(x_data_train, y_data_temperature_train1)
prediced1 = regr1.predict(x_data_train)
regr2.fit(prediced1.reshape(-1, 1), y_data_temperature_train2)

prediced2 = regr1.predict(x_data_test)
err = abs(regr2.predict(prediced2.reshape(-1, 1)) - y_data_temperature_test2)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 3.64 RMSE: 5.54


Można jeszcze sprawdzić wyniki jakby uczyć go na podstawie prawdziwych danych a nie predykcji

# Study issue VI

In [2]:
# Dlugo liczy
data_by_seasons = get_train_and_test_data_by_season('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True)

In [5]:
data_train0, data_train0 = data_by_seasons[0]
x_data_train0, y_data_wind_train0, y_data_temperature_train0 = data_train0
x_data_test0, y_data_wind_test0, y_data_temperature_test0 = data_train0

data_train1, data_train1 = data_by_seasons[1]
x_data_train1, y_data_wind_train1, y_data_temperature_train1 = data_train1
x_data_test1, y_data_wind_test1, y_data_temperature_test1 = data_train1

data_train2, data_train2 = data_by_seasons[2]
x_data_train2, y_data_wind_train2, y_data_temperature_train2 = data_train2
x_data_test2, y_data_wind_test2, y_data_temperature_test2 = data_train2

data_train3, data_train3 = data_by_seasons[3]
x_data_train3, y_data_wind_train3, y_data_temperature_train3 = data_train3
x_data_test3, y_data_wind_test3, y_data_temperature_test3 = data_train3

### Spring

In [11]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train0, y_data_wind_train0)
predicted = clf.predict(x_data_test0)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test0, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test0, predicted)))

Accuracy: 0.33882
Resampled balanced accuracy: 0.53363


In [12]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train0, y_data_temperature_train0)
err = abs(regr.predict(x_data_test0) - y_data_temperature_test0)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 4.2 RMSE: 6.03


### Summer

In [13]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train1, y_data_wind_train1)
predicted = clf.predict(x_data_test1)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test1, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test1, predicted)))

Accuracy: 0.78888
Resampled balanced accuracy: 0.59835


In [14]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train1, y_data_temperature_train1)
err = abs(regr.predict(x_data_test1) - y_data_temperature_test1)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 5.15 RMSE: 7.05


### Fall

In [16]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train2, y_data_wind_train2)
predicted = clf.predict(x_data_test2)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test2, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test2, predicted)))

Accuracy: 0.77613
Resampled balanced accuracy: 0.50000


In [17]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train2, y_data_temperature_train2)
err = abs(regr.predict(x_data_test2) - y_data_temperature_test2)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 4.24 RMSE: 5.83




### Winter

In [18]:
clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(100, 20), random_state=1)
clf.fit(x_data_train3, y_data_wind_train3)
predicted = clf.predict(x_data_test3)
print("Accuracy: {:.5f}".format(accuracy_score(y_data_wind_test3, predicted)))
print("Resampled balanced accuracy: {:.5f}".format(balanced_accuracy_score(y_data_wind_test3, predicted)))

Accuracy: 0.73451
Resampled balanced accuracy: 0.52134


In [19]:
regr = MLPRegressor(alpha=1e-6, hidden_layer_sizes=(100, 20), random_state=1)
regr.fit(x_data_train3, y_data_temperature_train3)
err = abs(regr.predict(x_data_test3) - y_data_temperature_test3)
MAE = round(np.mean(err),2)
RMSE = round(np.sqrt(((err)**2).mean()),2)
print("MAE:", MAE, "RMSE:", RMSE)

MAE: 3.78 RMSE: 4.84


# Study issue VII


In [None]:
data_train, dest_test = get_train_and_test_data('./data', amount_of_days=3, wind_border=8, convert_str_variable_flag=True)
x_data_train, y_data_wind_train, y_data_temperature_train = data_train
x_data_test, y_data_wind_test, y_data_temperature_test = data_train

In [20]:
# mlp for combined regression and classification predictions on the abalone dataset
from numpy import unique
from numpy import argmax
from pandas import read_csv
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model

In [32]:
# https://machinelearningmastery.com/neural-network-models-for-combined-classification-and-regression/

# x_data_train, y_data_wind_train, y_data_temperature_train = data_train

visible = Input(shape=(x_data_train.shape[1],))
hidden1 = Dense(20, activation='relu', kernel_initializer='he_normal')(visible)
hidden2 = Dense(10, activation='relu', kernel_initializer='he_normal')(hidden1)
# regression output
out_reg = Dense(1, activation='linear')(hidden2)
# classification output
out_clas = Dense(len(unique(y_data_wind_train)), activation='softmax')(hidden2)
# define model
model = Model(inputs=visible, outputs=[out_reg, out_clas])
# compile the keras model
model.compile(loss=['mse','sparse_categorical_crossentropy'], optimizer='adam')

In [33]:
# fit the keras model on the dataset
model.fit(x_data_train, [y_data_temperature_train, y_data_wind_train], epochs=15, batch_size=32, verbose=2)

Epoch 1/15
1376/1376 - 1s - loss: 1295.9841 - dense_6_loss: 1283.7991 - dense_7_loss: 12.1849
Epoch 2/15
1376/1376 - 0s - loss: 64.6328 - dense_6_loss: 64.2048 - dense_7_loss: 0.4280
Epoch 3/15
1376/1376 - 0s - loss: 59.1674 - dense_6_loss: 58.7390 - dense_7_loss: 0.4284
Epoch 4/15
1376/1376 - 0s - loss: 57.6984 - dense_6_loss: 57.2666 - dense_7_loss: 0.4319
Epoch 5/15
1376/1376 - 0s - loss: 56.2536 - dense_6_loss: 55.8244 - dense_7_loss: 0.4292
Epoch 6/15
1376/1376 - 0s - loss: 55.4888 - dense_6_loss: 55.0570 - dense_7_loss: 0.4318
Epoch 7/15
1376/1376 - 0s - loss: 55.8640 - dense_6_loss: 55.4351 - dense_7_loss: 0.4288
Epoch 8/15
1376/1376 - 0s - loss: 54.6507 - dense_6_loss: 54.2221 - dense_7_loss: 0.4287
Epoch 9/15
1376/1376 - 0s - loss: 54.3652 - dense_6_loss: 53.9368 - dense_7_loss: 0.4284
Epoch 10/15
1376/1376 - 0s - loss: 53.4865 - dense_6_loss: 53.0545 - dense_7_loss: 0.4320
Epoch 11/15
1376/1376 - 0s - loss: 53.4612 - dense_6_loss: 53.0262 - dense_7_loss: 0.4349
Epoch 12/15
13

<keras.callbacks.History at 0x24a12c6c3a0>