In [None]:
from pathlib import Path
import os

import pandas as pd
import numpy as np
from ..data_processing.data_processing import *

from sklearn import preprocessing
from sklearn import linear_model
from sklearn import metrics
from sklearn.pipeline import Pipeline


In [None]:
MAIN_PATH = Path(os.getcwd()).parent

## Load data

In [None]:
org_df = pd.read_csv(os.path.join(MAIN_PATH, "data", "data.csv"))
org_df = load(org_df)
org_df

In [None]:
df = pd.DataFrame(index=org_df.index)

## Simple features based on correlation

In [None]:
# srednia po 'TEMP POD 2 WARSTWĄ WYMURÓWKI [°C]'
TIX1 = org_df.columns[org_df.columns.str.contains('001tix')].values
df['TIX1'] = org_df[TIX1].mean(axis='columns')

In [None]:
# srednia po 'WODY POWROTNE KOLEKTORÓW [°C]'
TIR = org_df.columns[org_df.columns.str.contains('tir')].values
df['TIR'] = org_df[TIR].mean(axis='columns')

In [None]:
# dodanie reg nadawy koncentratu
FCX = org_df.columns[org_df.columns.str.contains('fcx')].values
df[FCX] = org_df[FCX]

In [None]:
# sumaryczna moc cieplna
NIR = org_df.columns[org_df.columns.str.contains('nir')].values
df[NIR] = org_df[NIR]

In [None]:
# dodanie temp żużla
df["TEMP_ZUZ"] = org_df["temp_zuz"]

In [None]:
df = df.dropna(axis=0)
df.shape

In [None]:
df

## Split data

In [None]:
train, val, test = split(df)

In [None]:
X_train, X_val, X_test = train.drop(["TEMP_ZUZ"], axis=1), val.drop(["TEMP_ZUZ"], axis=1), test.drop(["TEMP_ZUZ"], axis=1)
y_train, y_val, y_test = train["TEMP_ZUZ"], val["TEMP_ZUZ"], test["TEMP_ZUZ"]

## Ridge Regression

In [None]:
model = linear_model.Ridge(alpha=.8)
model.fit(X_train, y_train)

In [None]:
metrics.mean_squared_error(y_val, model.predict(X_val))

## Polynomial Regression

In [None]:
poly_model = Pipeline([('poly', preprocessing.PolynomialFeatures(degree=2)),
                      ('linear', linear_model.LinearRegression(fit_intercept=False))])

In [None]:
poly_model = poly_model.fit(X_train, y_train)

In [None]:
metrics.mean_squared_error(y_val, poly_model.predict(X_val))