In [133]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

**Read data**

In [109]:
temp_zuz = pd.read_csv('data/temp_zuz.csv', delimiter=';')
temp_zuz['Czas'] = pd.to_datetime(temp_zuz['Czas'])

In [118]:
df = pd.read_csv('data/data.csv')
df['czas'] = pd.to_datetime(df['czas'])

In [119]:
df.shape

(702780, 58)

In [120]:
temp_zuz.shape

(11384, 2)

In [121]:
df.head()

Unnamed: 0,czas,001fcx00211.pv,001fcx00221.pv,001fcx00231.pv,001fcx00241.pv,001fir01307.daca.pv,001fir01308.daca.pv,001fir01309.daca.pv,001fir01310.daca.pv,001fir01311.daca.pv,...,001uxm0rf01.daca.pv,001uxm0rf02.daca.pv,001uxm0rf03.daca.pv,037tix00254.daca.pv,037tix00264.daca.pv,prazonka_fe,prazonka_s,prob_corg,prob_fe,prob_s
0,2020-10-01 00:00:00+02:00,56.729077,54.724422,11.966905,22.493207,108.702362,112.280388,110.013796,108.761583,110.604901,...,92.090454,92.024078,92.405281,24.665309,24.526161,4.48,8.98,8.6,4.55,9.87
1,2020-10-01 00:01:00+02:00,54.771942,54.734675,12.02641,22.593412,108.846612,112.3346,109.906645,108.879083,110.644046,...,92.090534,92.024206,92.405239,24.663982,24.526161,4.48,8.98,8.6,4.55,9.87
2,2020-10-01 00:02:00+02:00,54.695816,54.774163,11.953671,22.428933,108.82764,112.362508,109.970932,108.844765,110.621665,...,92.090613,92.024334,92.405196,24.662656,24.526161,4.48,8.98,8.6,4.55,9.87
3,2020-10-01 00:03:00+02:00,54.154394,54.985713,12.052065,22.335388,108.623473,112.386061,109.836948,108.717994,110.464245,...,92.090692,92.024462,92.405153,24.661329,24.526161,4.48,8.98,8.6,4.55,9.87
4,2020-10-01 00:04:00+02:00,54.693184,54.490742,12.02731,22.41262,108.746784,112.396792,109.96691,108.978605,110.60366,...,92.090771,92.02459,92.40511,24.660003,24.525475,4.48,8.98,8.6,4.55,9.87


In [122]:
temp_zuz.head()

Unnamed: 0,Czas,temp_zuz
0,2020-10-01 00:00:00,1297
1,2020-10-01 01:00:00,1295
2,2020-10-01 02:00:00,1303
3,2020-10-01 02:30:00,1302
4,2020-10-01 03:00:00,1303


**Simple feature engineering**

In [123]:
# średnia woda powrotna kolektora
TIR = df.columns[df.columns.str.contains('tir')].values
df["avg_woda_powrotna"] = df[TIR].mean(axis=1)
df = df.drop(TIR, axis=1)
df.head()

Unnamed: 0,czas,001fcx00211.pv,001fcx00221.pv,001fcx00231.pv,001fcx00241.pv,001fir01307.daca.pv,001fir01308.daca.pv,001fir01309.daca.pv,001fir01310.daca.pv,001fir01311.daca.pv,...,001uxm0rf02.daca.pv,001uxm0rf03.daca.pv,037tix00254.daca.pv,037tix00264.daca.pv,prazonka_fe,prazonka_s,prob_corg,prob_fe,prob_s,avg_woda_powrotna
0,2020-10-01 00:00:00+02:00,56.729077,54.724422,11.966905,22.493207,108.702362,112.280388,110.013796,108.761583,110.604901,...,92.024078,92.405281,24.665309,24.526161,4.48,8.98,8.6,4.55,9.87,29.08639
1,2020-10-01 00:01:00+02:00,54.771942,54.734675,12.02641,22.593412,108.846612,112.3346,109.906645,108.879083,110.644046,...,92.024206,92.405239,24.663982,24.526161,4.48,8.98,8.6,4.55,9.87,29.087996
2,2020-10-01 00:02:00+02:00,54.695816,54.774163,11.953671,22.428933,108.82764,112.362508,109.970932,108.844765,110.621665,...,92.024334,92.405196,24.662656,24.526161,4.48,8.98,8.6,4.55,9.87,29.094825
3,2020-10-01 00:03:00+02:00,54.154394,54.985713,12.052065,22.335388,108.623473,112.386061,109.836948,108.717994,110.464245,...,92.024462,92.405153,24.661329,24.526161,4.48,8.98,8.6,4.55,9.87,29.098936
4,2020-10-01 00:04:00+02:00,54.693184,54.490742,12.02731,22.41262,108.746784,112.396792,109.96691,108.978605,110.60366,...,92.02459,92.40511,24.660003,24.525475,4.48,8.98,8.6,4.55,9.87,29.096812


In [124]:
# średnia woda chłodząca kolektora
FIR = df.columns[df.columns.str.contains('fir')].values
df["avg_woda_chlodzaca"] = df[FIR].mean(axis=1)
df = df.drop(FIR, axis=1)
df.head()

Unnamed: 0,czas,001fcx00211.pv,001fcx00221.pv,001fcx00231.pv,001fcx00241.pv,001nir0szr0.daca.pv,001tix01063.daca.pv,001tix01064.daca.pv,001tix01065.daca.pv,001tix01066.daca.pv,...,001uxm0rf03.daca.pv,037tix00254.daca.pv,037tix00264.daca.pv,prazonka_fe,prazonka_s,prob_corg,prob_fe,prob_s,avg_woda_powrotna,avg_woda_chlodzaca
0,2020-10-01 00:00:00+02:00,56.729077,54.724422,11.966905,22.493207,14.362428,385.911908,421.166412,455.862854,435.74131,...,92.405281,24.665309,24.526161,4.48,8.98,8.6,4.55,9.87,29.08639,113.360791
1,2020-10-01 00:01:00+02:00,54.771942,54.734675,12.02641,22.593412,14.381825,385.917027,421.166412,455.862427,435.739176,...,92.405239,24.663982,24.526161,4.48,8.98,8.6,4.55,9.87,29.087996,113.354389
2,2020-10-01 00:02:00+02:00,54.695816,54.774163,11.953671,22.428933,14.359413,385.922148,421.166412,455.862,435.737043,...,92.405196,24.662656,24.526161,4.48,8.98,8.6,4.55,9.87,29.094825,113.38307
3,2020-10-01 00:03:00+02:00,54.154394,54.985713,12.052065,22.335388,14.353036,385.927267,421.166412,455.861572,435.734911,...,92.405153,24.661329,24.526161,4.48,8.98,8.6,4.55,9.87,29.098936,113.281268
4,2020-10-01 00:04:00+02:00,54.693184,54.490742,12.02731,22.41262,14.392053,385.932387,421.166412,455.861145,435.732775,...,92.40511,24.660003,24.525475,4.48,8.98,8.6,4.55,9.87,29.096812,113.380821


In [125]:
# średnia temperatura pod warstwą wymurówki
TIX = df.columns[df.columns.str.contains('tix')].values
df["avg_temp_pod"] = df[TIX].mean(axis=1)
df = df.drop(TIX, axis=1)
df.head()

Unnamed: 0,czas,001fcx00211.pv,001fcx00221.pv,001fcx00231.pv,001fcx00241.pv,001nir0szr0.daca.pv,001txi01153.daca.pv,001txi01154.daca.pv,001uxm0rf01.daca.pv,001uxm0rf02.daca.pv,001uxm0rf03.daca.pv,prazonka_fe,prazonka_s,prob_corg,prob_fe,prob_s,avg_woda_powrotna,avg_woda_chlodzaca,avg_temp_pod
0,2020-10-01 00:00:00+02:00,56.729077,54.724422,11.966905,22.493207,14.362428,19.126427,19.929581,92.090454,92.024078,92.405281,4.48,8.98,8.6,4.55,9.87,29.08639,113.360791,388.652871
1,2020-10-01 00:01:00+02:00,54.771942,54.734675,12.02641,22.593412,14.381825,19.125526,19.942439,92.090534,92.024206,92.405239,4.48,8.98,8.6,4.55,9.87,29.087996,113.354389,388.651983
2,2020-10-01 00:02:00+02:00,54.695816,54.774163,11.953671,22.428933,14.359413,19.124626,19.919802,92.090613,92.024334,92.405196,4.48,8.98,8.6,4.55,9.87,29.094825,113.38307,388.651095
3,2020-10-01 00:03:00+02:00,54.154394,54.985713,12.052065,22.335388,14.353036,19.123726,19.861671,92.090692,92.024462,92.405153,4.48,8.98,8.6,4.55,9.87,29.098936,113.281268,388.650208
4,2020-10-01 00:04:00+02:00,54.693184,54.490742,12.02731,22.41262,14.392053,19.122826,19.803541,92.090771,92.02459,92.40511,4.48,8.98,8.6,4.55,9.87,29.096812,113.380821,388.649294


In [126]:
df.shape

(702780, 19)

**Shuffle data and split**

In [127]:
new_df = pd.concat([df, temp_zuz], axis=1)
new_df = new_df.drop(["czas"], axis=1)
new_df = new_df.dropna(axis=0)
new_df = new_df.sample(frac=1)

In [128]:
new_df

Unnamed: 0,001fcx00211.pv,001fcx00221.pv,001fcx00231.pv,001fcx00241.pv,001nir0szr0.daca.pv,001txi01153.daca.pv,001txi01154.daca.pv,001uxm0rf01.daca.pv,001uxm0rf02.daca.pv,001uxm0rf03.daca.pv,prazonka_fe,prazonka_s,prob_corg,prob_fe,prob_s,avg_woda_powrotna,avg_woda_chlodzaca,avg_temp_pod,Czas,temp_zuz
8347,58.615552,58.810913,12.103402,27.002987,15.741121,19.694986,19.975965,92.089029,92.071793,92.392958,4.32,7.30,8.54,4.45,9.81,28.895805,113.580361,390.456081,2021-09-21 19:00:00,1294.0
6070,57.653888,57.862395,10.048995,25.774641,17.772750,14.823839,15.284793,92.095556,92.063856,92.401855,4.48,8.98,8.74,4.56,10.09,28.178979,113.387797,389.672921,2021-06-15 23:00:00,1312.0
5315,56.456103,56.699189,11.045749,25.277758,16.143427,24.010362,24.445235,92.084344,92.071937,92.406851,4.51,8.58,8.78,4.52,9.93,29.194176,113.459179,391.850056,2021-05-14 12:00:00,1310.0
4149,56.980393,56.850258,15.916761,27.010380,13.040084,25.485557,25.996911,92.091836,92.066619,92.395149,4.32,8.80,8.88,4.58,10.02,28.916277,113.188533,391.803952,2021-03-29 00:00:00,1285.0
7264,56.876262,56.848015,12.087601,25.253205,14.858390,17.390539,17.293264,92.085153,92.067921,92.405396,4.39,7.54,8.70,4.50,9.85,28.665529,113.646526,389.731572,2021-08-05 23:00:00,1305.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4412,57.135089,57.014260,9.976559,26.278073,14.657290,23.800030,24.346899,92.086777,92.081754,92.404981,4.44,9.24,8.82,4.59,9.97,29.511999,113.417595,392.107854,2021-04-08 10:00:00,1295.0
10242,58.706441,59.023506,11.047440,25.581027,17.991063,17.841250,17.885232,92.098863,92.073977,92.400777,4.48,8.84,8.84,4.47,9.61,28.182887,113.514270,389.161566,2021-12-14 01:00:00,1304.0
4513,55.057656,55.972320,14.833382,29.368239,16.073462,25.659809,26.237921,92.096145,92.077769,92.405396,4.57,9.46,8.82,4.53,9.74,29.505354,113.429332,391.966774,2021-04-12 10:00:00,1310.0
8368,59.039392,58.904336,11.996203,28.162483,18.491770,19.419823,19.750575,92.088926,92.071793,92.386684,4.32,7.30,8.54,4.45,9.81,28.812671,113.430161,390.398494,2021-09-22 16:00:00,1300.0


In [129]:
new_df.shape

(11384, 20)

In [130]:
train = new_df.iloc[:int(new_df.shape[0] * 0.9)]
test = new_df.iloc[int(new_df.shape[0] * 0.9):]

In [131]:
train_X = train.drop(["temp_zuz"], axis=1)
test_X = test.drop(["temp_zuz"], axis=1)
train_Y = np.array(train["temp_zuz"])
test_Y = np.array(test["temp_zuz"])

In [132]:
test_Y

array([1300., 1308., 1296., ..., 1310., 1300., 1303.])

**Normalization**

In [134]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_X))

AttributeError: module 'tensorflow.keras.layers' has no attribute 'Normalization'