In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

import tensorflow as tf

from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

**Read data**

In [None]:
temp_zuz = pd.read_csv('data/temp_zuz.csv', delimiter=';')
temp_zuz['Czas'] = pd.to_datetime(temp_zuz['Czas'])

In [None]:
df = pd.read_csv('data/data.csv')
df['czas'] = pd.to_datetime(df['czas'])

In [None]:
df.shape

In [None]:
temp_zuz.shape

In [None]:
df.head()

In [None]:
temp_zuz.head()

**Simple feature engineering**

In [None]:
# średnia woda powrotna kolektora
TIR = df.columns[df.columns.str.contains('tir')].values
df["avg_woda_powrotna"] = df[TIR].mean(axis=1)
df = df.drop(TIR, axis=1)
df.head()

In [None]:
# średnia woda chłodząca kolektora
FIR = df.columns[df.columns.str.contains('fir')].values
df["avg_woda_chlodzaca"] = df[FIR].mean(axis=1)
df = df.drop(FIR, axis=1)
df.head()

In [None]:
# średnia temperatura pod warstwą wymurówki
TIX = df.columns[df.columns.str.contains('tix')].values
df["avg_temp_pod"] = df[TIX].mean(axis=1)
df = df.drop(TIX, axis=1)
df.head()

In [None]:
df.shape

**Shuffle data and split**

In [None]:
new_df = pd.concat([df, temp_zuz], axis=1)
new_df = new_df.drop(["czas"], axis=1)
new_df = new_df.dropna(axis=0)
new_df = new_df.sample(frac=1)

In [None]:
new_df

In [None]:
new_df.shape

### Correlation matrix

In [None]:
feature_desc_df = pd.read_csv('data_processing/feature_desc.csv', index_col='name')
def feature_desc(name):
        try:
            if type(name)==str:
                return feature_desc_df.loc[name]['desc']
            else:
                return feature_desc_df.loc[name]['desc'].values
        except:
            return name

In [None]:
col_names = list(new_df.columns.values)

for col_name in col_names:
    new_df = new_df.rename(columns={col_name: feature_desc(col_name)})


In [None]:
import seaborn as sns

# calculate the correlation matrix
corr = new_df.corr()

# plot the heatmap
sns.heatmap(corr, 
        xticklabels=corr.columns,
        yticklabels=corr.columns)

In [None]:
new_df.head

### Splitting data

In [None]:
train = new_df.iloc[:int(new_df.shape[0] * 0.9)]
test = new_df.iloc[int(new_df.shape[0] * 0.9):]

In [None]:
train_X = np.asarray(train.drop(["temp_zuz", "Czas"], axis=1)).astype(np.float32)
test_X = test.drop(["temp_zuz", "Czas"], axis=1)
train_Y = np.asarray(train["temp_zuz"]).astype(np.float32)
test_Y = np.asarray(test["temp_zuz"]).astype(np.float32)

In [None]:
train_Y

**A very simple neural network**

In [None]:
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(8),
                             tf.keras.layers.Dense(4),
                             tf.keras.layers.Dense(2),
                             tf.keras.layers.Dense(1)
])
model.compile( loss = tf.keras.losses.mae, #mae stands for mean absolute error
              optimizer = tf.keras.optimizers.Adam(), #stochastic GD
              metrics = ['mae'])
model.fit(train_X, train_Y, epochs = 100)

In [None]:
# test data
pred = model.predict(test_X)
mae = tf.metrics.mean_absolute_error(y_true=test_Y, 
                                     y_pred=pred.squeeze()).numpy()
mse = tf.metrics.mean_squared_error(y_true = test_Y,
                                      y_pred=pred.squeeze()).numpy()
print(mae, mse)

**Linear Regression**

In [None]:
regr = linear_model.LinearRegression()

In [None]:
regr.fit(train_X, train_Y)

In [None]:
y_pred = regr.predict(test_X)

In [None]:
# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(test_Y, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(test_Y, y_pred))