In [None]:
import re
import sys
import glob
import math
import random
import numpy as np
import pandas as pd
import seaborn as sns
from time import ctime
from random import randrange
from sklearn.model_selection import KFold
from joblib import Parallel, delayed
import multiprocessing
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from sklearn import preprocessing
register_matplotlib_converters()
sns.set_style('darkgrid')

In [None]:
from sklearn.linear_model._bayes import ARDRegression
from sklearn.ensemble._weight_boosting import AdaBoostRegressor
from sklearn.linear_model._bayes import BayesianRidge
from sklearn.tree._classes import DecisionTreeRegressor 
from sklearn.linear_model._coordinate_descent import ElasticNetCV
from sklearn.ensemble._forest import ExtraTreesRegressor
from sklearn.gaussian_process._gpr import GaussianProcessRegressor
from sklearn.linear_model._glm.glm import GeneralizedLinearRegressor
from sklearn.ensemble._gb import GradientBoostingRegressor
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor
from sklearn.linear_model._huber import HuberRegressor
from sklearn.isotonic import IsotonicRegression 
from sklearn.neighbors._regression import KNeighborsRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model._least_angle import LarsCV
from sklearn.linear_model._coordinate_descent import LassoCV
from sklearn.linear_model._least_angle import LassoLarsCV
from sklearn.linear_model._least_angle import LassoLarsIC
from sklearn.linear_model._base import LinearRegression
from sklearn.svm._classes import LinearSVR
from sklearn.neural_network._multilayer_perceptron import MLPRegressor
from sklearn.svm._classes import NuSVR
from sklearn.linear_model._omp import OrthogonalMatchingPursuitCV
from sklearn.linear_model._passive_aggressive import PassiveAggressiveRegressor
from sklearn.neighbors._regression import RadiusNeighborsRegressor
from sklearn.ensemble._forest import RandomForestRegressor
from sklearn.linear_model._ridge import RidgeCV
from sklearn.linear_model._stochastic_gradient import SGDRegressor
from sklearn.svm._classes import SVR
from sklearn.linear_model._glm.glm import TweedieRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import train_test_split
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

estimators = [('ard', ARDRegression()),
              ('ada', AdaBoostRegressor()),
              ('brr', BayesianRidge()),
              ('dtr', DecisionTreeRegressor()),
              ('enc', ElasticNetCV()),
              ('etr', ExtraTreesRegressor()),
              ('gpr', GaussianProcessRegressor()),
              ('glr', GeneralizedLinearRegressor()),
              ('gbr', GradientBoostingRegressor()),
              ('hgb', HistGradientBoostingRegressor()),
              ('hur', HuberRegressor()),
              ('knr', KNeighborsRegressor()),
              ('ker', KernelRidge()),
              ('lar', LarsCV()),
              ('las', LassoCV()),
              ('llc', LassoLarsCV()),
              ('lli', LassoLarsIC()),
              ('lir', LinearRegression()),
              ('lsv', LinearSVR(max_iter=100000)),
              ('mlp', MLPRegressor(max_iter=10000)),
              ('nsv', NuSVR(max_iter=100000)),
              ('par', PassiveAggressiveRegressor(max_iter=10000)),
              ('omp', OrthogonalMatchingPursuitCV()),
              ('rfr', RandomForestRegressor()),
              ('sgd', SGDRegressor(max_iter=10000)),
              ('svr', SVR(max_iter=100000)),
              ('twr', TweedieRegressor(max_iter=10000))]

festimators = [('etr', ExtraTreesRegressor()),
              ('gpr', GaussianProcessRegressor()),
              ('gbr', GradientBoostingRegressor()),
              ('hgb', HistGradientBoostingRegressor()),
              ('ker', KernelRidge()),
              ('lir', LinearRegression()),
              ('mlp', MLPRegressor(max_iter=10000)),
              ('nsv', NuSVR(max_iter=100000)),
              ('rfr', RandomForestRegressor()),
              ('svr', SVR(max_iter=100000))]

Y = pd.read_csv("Y_pca_30.csv")

j = 0
nmodels = 10000

stacked = pd.DataFrame(index=list(range(nmodels)),columns = ['base','meta','score'])
for k in range(nmodels):
    base_models = list()
    meta_name,meta_model = random.choice(festimators)
    base_name,base_model = random.choice(estimators)
    sname = base_name + '_pca_30.csv'
    base_models.append(('est0',base_model))

    X = pd.read_csv(sname)
    nest = randrange(3,7)
    for i in range(nest):
        base_name,base_model = random.choice(estimators)
        est_string = 'est'+str(i+1)
        base_models.append((est_string,base_model))
        sname = base_name + '_pca_30.csv'
        X = pd.concat([X, pd.read_csv(sname)],axis=1)
 
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

    model = meta_model
    model.fit(X_train,Y_train.to_numpy().reshape(-1))
    score = model.score(X_test,Y_test.to_numpy().reshape(-1))

    stacked.loc[j]['base'] = base_models
    stacked.loc[j]['meta'] = meta_model
    stacked.loc[j]['score'] = score

    j = j + 1

In [None]:
printdf = stacked.sort_values(by = 'score')

In [None]:
printdf.to_csv('metadf.csv', index=False)

In [None]:
train_frags = glob.glob("./train_fft/*")
test_frags = glob.glob("./test_fft/*")
train = pd.read_csv("train.csv")
Y = pd.Series(0, index=np.arange(len(train_frags)))

i = 0
for file in train_frags:
    start = './train_fft/'
    end = '.csv'
    seg_id = file[file.find(start)+len(start):file.rfind(end)]
    t2e = int(train.loc[train['segment_id'] == int(seg_id)]['time_to_eruption'].values)
    Y.iloc[i] = t2e
    i = i + 1

test_id = pd.Series(0, index=np.arange(len(test_frags)))    
i = 0
for file in test_frags:
    start = './test_fft/'
    end = '.csv'
    seg_id = file[file.find(start)+len(start):file.rfind(end)]

    test_id.iloc[i] = seg_id
    i = i + 1

In [None]:
from sklearn import preprocessing
timescaler = preprocessing.StandardScaler().fit(Y.to_numpy().reshape(-1,1))
Y = pd.Series(timescaler.transform(Y.to_numpy().reshape(-1,1)).reshape(-1))

In [None]:
all_pca = pd.read_csv("pca_30.csv")
X = all_pca[:len(train_frags)]
X_test = all_pca[-len(test_frags):]

In [None]:
from sklearn.linear_model._bayes import ARDRegression
from sklearn.ensemble._weight_boosting import AdaBoostRegressor
from sklearn.linear_model._bayes import BayesianRidge
from sklearn.tree._classes import DecisionTreeRegressor 
from sklearn.linear_model._coordinate_descent import ElasticNetCV
from sklearn.ensemble._forest import ExtraTreesRegressor
from sklearn.gaussian_process._gpr import GaussianProcessRegressor
from sklearn.linear_model._glm.glm import GeneralizedLinearRegressor
from sklearn.ensemble._gb import GradientBoostingRegressor
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import HistGradientBoostingRegressor
from sklearn.linear_model._huber import HuberRegressor
from sklearn.isotonic import IsotonicRegression 
from sklearn.neighbors._regression import KNeighborsRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model._least_angle import LarsCV
from sklearn.linear_model._coordinate_descent import LassoCV
from sklearn.linear_model._least_angle import LassoLarsCV
from sklearn.linear_model._least_angle import LassoLarsIC
from sklearn.linear_model._base import LinearRegression
from sklearn.svm._classes import LinearSVR
from sklearn.neural_network._multilayer_perceptron import MLPRegressor
from sklearn.svm._classes import NuSVR
from sklearn.linear_model._omp import OrthogonalMatchingPursuitCV
from sklearn.linear_model._passive_aggressive import PassiveAggressiveRegressor
from sklearn.neighbors._regression import RadiusNeighborsRegressor
from sklearn.ensemble._forest import RandomForestRegressor
from sklearn.linear_model._ridge import RidgeCV
from sklearn.linear_model._stochastic_gradient import SGDRegressor
from sklearn.svm._classes import SVR
from sklearn.linear_model._glm.glm import TweedieRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import KFold

n_fold = 5
cv = KFold(n_splits=n_fold, shuffle=True, random_state=420)

for i in range(9999,9800,-1):   
    r2 = 0
    predicted_times = np.zeros(len(X_test))
    estimators = printdf.iloc[i]['base']
    festimator = printdf.iloc[i]['meta']
    model = StackingRegressor(estimators=estimators,final_estimator=festimator)
    print('\n',i, 'started at', ctime())
    for fold_n, (train_index, valid_index) in enumerate(cv.split(X)):


        X_train = X.iloc[train_index,:]
        X_valid = X.iloc[valid_index,:]

        Y_train = Y.iloc[train_index]
        Y_valid = Y.iloc[valid_index]

        model.fit(X_train,Y_train)
        r2 += model.score(X_valid,Y_valid)
        predicted_times += model.predict(X_test)

    r2 /= n_fold
    predicted_times /= n_fold

    predicted_times = timescaler.inverse_transform(predicted_times)
    submission = pd.DataFrame({
            "segment_id": test_id,
            "time_to_eruption": predicted_times
        })
    file = 'submission_' + str(i) + '.csv'
    submission.to_csv(file, index=False)

In [None]:
n_fold = 5
cv = KFold(n_splits=n_fold, shuffle=True, random_state=420)

for i in range(9800,9000,-1):   
    r2 = 0
    predicted_times = np.zeros(len(X_test))
    estimators = printdf.iloc[i]['base']
    festimator = printdf.iloc[i]['meta']
    model = StackingRegressor(estimators=estimators,final_estimator=festimator)
    print('\n',i, 'started at', ctime())
    for fold_n, (train_index, valid_index) in enumerate(cv.split(X)):


        X_train = X.iloc[train_index,:]
        X_valid = X.iloc[valid_index,:]

        Y_train = Y.iloc[train_index]
        Y_valid = Y.iloc[valid_index]

        model.fit(X_train,Y_train)
        r2 += model.score(X_valid,Y_valid)
        predicted_times += model.predict(X_test)

    r2 /= n_fold
    predicted_times /= n_fold

    predicted_times = timescaler.inverse_transform(predicted_times)
    submission = pd.DataFrame({
            "segment_id": test_id,
            "time_to_eruption": predicted_times
        })
    file = 'submission_' + str(i) + '.csv'
    submission.to_csv(file, index=False)