In [1]:
import sys
sys.path.insert(1, './imports')
import pandas as pd
import numpy as np
from exploratory_analysis import *
from preprocessing import *
from model_selection import *
from model_end_to_end import *
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.preprocessing import PolynomialFeatures

In [5]:
fires = pd.read_csv("fires.csv")
msinho_area = MontesinhoCompleteModel_evaluator(SVC(kernel = 'rbf', gamma=0.3727, C= 3, class_weight={0:1.3, 1:1}),
                                           SVC(kernel='rbf', gamma = 50, C = 10),
                                           SVC(C=10.326154432909957, gamma=100.0, kernel='linear'),
                                           PolynomialFeatures(degree=3),
                                           PolynomialFeatures(degree=1),
                                           PolynomialFeatures(degree=3),
                                           GradientBoostingRegressor(learning_rate= 0.1875, n_estimators= 20),
                                           GradientBoostingRegressor(learning_rate= 0.46, n_estimators= 20),
                                           GradientBoostingRegressor(learning_rate= 0.6, n_estimators= 20))
msinho_area.fit(fires)
msinho_area.average_loss()

1.4384081903056256

In [6]:
msinho_acc = MontesinhoCompleteModel_evaluator(SVC(kernel = 'rbf', gamma=0.002652859881404326, C= 100, degree=5),
                                           SVC(C=8.41850316809234, gamma=0.04349773501643578, kernel='linear'),
                                           SVC(C=0.26869573356106563, degree=2, gamma=0.9600363819999834, kernel='poly'),
                                           PolynomialFeatures(degree=3),
                                           PolynomialFeatures(degree=1),
                                           PolynomialFeatures(degree=3),
                                           GradientBoostingRegressor(learning_rate= 0.1875, n_estimators= 20),
                                           GradientBoostingRegressor(learning_rate= 0.46, n_estimators= 20),
                                           GradientBoostingRegressor(learning_rate= 0.6, n_estimators= 20))
msinho_acc.fit(fires)
msinho_acc.average_loss()

1.293909348100353

In [82]:
class MontesinhoCompleteModel_evaluator_simple_gbr():
    def __init__(self, model, degree):
        self.preprocessor = DataPreprocessorPCA()
        self.poly = PolynomialFeatures(degree=degree)
        self.model = model

    def fit(self, data):
        self.preprocessor.fit(data)
        self.data = self.preprocessor.transform_with_1target()
        self.poly.fit(self.data.drop(["area"], axis=1))
        self.data_poly = self.poly.transform(self.data.drop(["area"], axis=1))
    def train(self):
        X = self.data_poly
        y= self.data["area"]
        self.model.fit(X, y)
    def predict_instance(self, instance):
        pass
    def evaluate(self):
        X = self.data_poly
        y= self.data["area"]
        rskf = KFold(n_splits=15, shuffle=True,random_state=3558)
        losses = []
        n=0
        for train_index, test_index in rskf.split(X, y):
            n += 1
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
            self.model.fit(X_train, y_train)
            y_pred = self.model.predict(X_test)
            losses.append(average_absolute_loss(y_pred, y_test))
        return sum(losses)/n
    
class DataPreprocessorPCA(DataPreprocessor):
    """
        Interface for preprocessing Montesinho data. Includes additional PCA transformation of the numeric continuous variables
    """
    def __init__(self):
        self.transformable_cols = ['FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH','wind']
    
    def transform_with_2target(self):
        month_df = self.month_encoding()
        xy_df = self.xy_encoding()
        day_df = self.day_encoding()
        rain_series = self.rain_transform()
        dc_df = self.DC_encoding()
        target_df = self.area_split_transform()
        numerical_df = pd.DataFrame(self.processor.transform(self.data[self.transformable_cols]), columns = self.transformable_cols)
        self.pca_processor = PCA()
        self.pca_processor.fit(numerical_df)
        pca_mat = self.pca_processor.transform(numerical_df)
        numerical_df = pd.DataFrame(pca_mat, columns = ['pc'+str(i) for i in np.arange(1, 8)])        
        output = pd.concat([month_df, xy_df, day_df, rain_series, dc_df, numerical_df, target_df], axis=1)
        return output
    def transform_with_1target(self):
        month_df = self.month_encoding()
        xy_df = self.xy_encoding()
        day_df = self.day_encoding()
        rain_series = self.rain_transform()
        dc_df = self.DC_encoding()
        target_df = self.area_split_transform()
        target_df = target_df[["area"]]
        numerical_df = pd.DataFrame(self.processor.transform(self.data[self.transformable_cols]), columns = self.transformable_cols)
        self.pca_processor = PCA()
        self.pca_processor.fit(numerical_df)
        pca_mat = self.pca_processor.transform(numerical_df)
        numerical_df = pd.DataFrame(pca_mat, columns = ['pc'+str(i) for i in np.arange(1, 8)])        
        output = pd.concat([month_df, xy_df, day_df, rain_series, dc_df, numerical_df, target_df], axis=1)
        return output
    def transform_single_instance(self, instance):
#         instance_df = pd.DataFrame(instance)
#         self.pca_processor = PCA()
#         self.pca_processor.fit(instance_df[self.transformable_cols])
#         pca_mat = self.pca_processor.transform(numerical_df)
#         numerical_df = pd.DataFrame(pca_mat, columns = ['pc'+str(i) for i in np.arange(1, 8)])        
#         output = pd.concat([month_df, xy_df, day_df, rain_series, dc_df, numerical_df, target_df], axis=1)
        return output
        

In [71]:
montesinho_gbr = MontesinhoCompleteModel_evaluator_simple_gbr(GradientBoostingRegressor(learning_rate= 0.05, n_estimators= 100), 1)
montesinho_gbr.fit(fires)
montesinho_gbr.evaluate()

1.1688254625573125

In [85]:
montesinho_rfr = MontesinhoCompleteModel_evaluator_simple_gbr(RandomForestRegressor(criterion = "mse", n_estimators= 100), 1)
montesinho_rfr.fit(fires)
montesinho_rfr.evaluate()

1.185898899196142