In [None]:
from glob import glob
import os
import gzip
import shutil
import pandas as pd
import matplotlib.pyplot as plt

from data_processing import *

import pickle

In [None]:
def unpack(dir_path = '../data/'):
    list_of_files = glob(dir_path + '*.gz')
    for f in glob(dir_path + '*.gz'):
        with gzip.open(f, 'rb') as f_in:
            with open(f[:-3] + '00.csv', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
            
unpack()

In [None]:
def merge(dir_path = '../data/'):
    files = sorted([f for f in os.listdir(dir_path) if f[-6:] == '00.csv'])
    merged_df = pd.concat([pd.read_csv(dir_path + f) for f in files])

    merged_df['czas'] = pd.to_datetime(merged_df['czas'])
    assert merged_df['czas'].is_monotonic_increasing

    merged_df.to_csv(dir_path + 'merged.csv', index=False)
    
    return merged_df
df_og = merge()

In [None]:
k = 15 # number of minutes in cluster
df_og = aggregate(df_og, k)

### Choosing and aggregating features

In [None]:
df = pd.DataFrame(index=df_og.index)

# srednia po 'WODY POWROTNE KOLEKTORÓW [°C]'
TIR = df_og.columns[df_og.columns.str.contains('tir')].values
df['TIR'] = df_og[TIR].mean(axis='columns')

# srednia po 'TEMP POD 2 WARSTWĄ WYMURÓWKI [°C]'
TIX1 = df_og.columns[df_og.columns.str.contains('001tix')].values
df['TIX1'] = df_og[TIX1].mean(axis='columns')

# prob_s i prob_corg
PR = ['prob_s', 'prob_corg']
df[PR] = df_og[PR]

# reg nadawy koncentratu
FCX = df_og.columns[df_og.columns.str.contains('fcx')].values
df['FCX'] = df_og[FCX].mean(axis='columns')

# sumaryczna moc cieplna
NIR = df_og.columns[df_og.columns.str.contains('nir')].values
df['NIR'] = df_og[NIR]

# WENT ODCZ ZAD OBROTÓW
UXM = df_og.columns[df_og.columns.str.contains('uxm')].values
df['UXM'] = df_og[UXM].mean(axis='columns')

### Loading mean and scale from historical data to standarize features

In [None]:
mean = pickle.load(open('mean.sav', 'rb'))
scl = pickle.load(open('scale.sav', 'rb'))

### Standarizing data 

In [None]:
df = df.sub(mean).div(scl)

### Loading pre-trained model

In [None]:
filename = 'finalized_model.sav'
model = pickle.load(open(filename, 'rb'))

### Predicting: 

In [None]:
y_pred = pd.DataFrame(model.predict(df))
y_pred.index = df.index
y_pred.columns = ['temp_zuz']
y_pred

### Plot predicted values

In [None]:
plt.scatter(y_pred.index, y_pred['temp_zuz'])
plt.show()

In [None]:
plt.plot(y_pred.index, y_pred['temp_zuz'])
plt.show()