### Training and prediction functions

In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd


class RF:
    
    parameters1 = {'max_depth': [10, 20, 30],
                   'n_estimators': [100, 500, 1000],
                   'max_features' : [15, 20, 25, 30],
                   'min_samples_leaf': [1, 2, 4],
                   'min_samples_split': [2, 5, 10],
                   'bootstrap': [True, False],
                  }
    
    parameters2 = {'max_depth': [5, 10, 20, 30],
                   'n_estimators': [10, 50, 100, 1000],
                   'max_features' : [20, 40, 70],
                   'min_samples_leaf': [1, 2, 4],
                   'min_samples_split': [2, 5, 10],
                   'bootstrap': [True, False],
                  }
    
    parameters3 = {'max_depth': [3, 5, 10, 15],
                   'n_estimators': [50, 100, 500, 1000],
                   'max_features' : [10, 15, 20],
                   'min_samples_leaf': [1, 2, 4],
                   'min_samples_split': [2, 5, 10],
                   'bootstrap': [True, False],
                  }
    
    parameters = parameters1
    
    def __init__(self, num_features: int):
        self.history = None
        self.num_features = num_features
        self.scaler = MinMaxScaler()
        self.gsc = GridSearchCV(estimator = RandomForestRegressor(),
                                param_grid = self.parameters,
                                cv = 5, scoring = 'neg_mean_squared_error', verbose = 0, n_jobs = -1)
           
    def generate_feature_and_target(self, data: pd.core.frame.DataFrame):
        feature_train = self.scaler.fit_transform(data[:, :self.num_features])
        target_train = np.array(data[:, self.num_features])

        return feature_train, target_train
    
    def best_rf(self, data: pd.core.frame.DataFrame):
        feature_train, target_train = self.generate_feature_and_target(data)
        best_rf = self.gsc.fit(feature_train, target_train)
        best_estimator = best_rf.best_estimator_
        
        return best_rf, best_estimator

In [1]:
def forecast(model, scaler, test_data):
    submission = pd.read_csv('C:/data/sample_submission.csv', index_col='seg_id', dtype={"time_to_failure": np.float64})
    feature_test = scaler.transform(np.array(test_data))
    for i in range(test_data.shape[0]):
        submission.time_to_failure[i] = model.predict(feature_test[i, :].reshape(1, -1))

    submission.to_csv('submission.csv')