In [118]:
import warnings
warnings.filterwarnings('ignore')

import ast
from catboost import CatBoostRegressor
from collections import defaultdict, Counter
from concurrent.futures import ThreadPoolExecutor
from lightgbm import LGBMRegressor
import numpy as np
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from openfe import OpenFE, transform, tree_to_formula, formula_to_tree, get_candidate_features, TwoStageFeatureSelector, ForwardFeatureSelector
import optuna
import os

import pandas as pd
from pprint import pprint

import random

from sklearn.ensemble import ExtraTreesRegressor, HistGradientBoostingRegressor, RandomForestRegressor
from sklearn.feature_selection import mutual_info_regression, RFECV
from sklearn.inspection import permutation_importance
from sklearn.kernel_approximation import Nystroem
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_validate, KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler

from scipy import stats

import time
from tqdm.notebook import tqdm

from xgboost import XGBRegressor

pd.set_option('display.max_columns', None)

experiment_name = 'openfe'

In [2]:
# Load the train and test datasets
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

df_train.shape, df_test.shape

((15289, 18), (10194, 17))

In [3]:
binary_cols = []

cat_cols = ['clonesize', 'honeybee', 'bumbles', 'andrena', 'MaxOfUpperTRange', 'MinOfUpperTRange', 'AverageOfUpperTRange', 'MaxOfLowerTRange', 'MinOfLowerTRange', 'AverageOfLowerTRange', 'RainingDays', 'AverageRainingDays']

num_cols = ['fruitset', 'fruitmass', 'seeds']

ordinal_cols = []

In [4]:
TARGET = 'yield'

In [5]:
# Separate features and target for train and validation data

X_train = df_train.drop(['id', TARGET], axis=1)
y_train = df_train[TARGET]

X_test = df_test.drop(['id'], axis=1)

k10 = KFold(n_splits=3, random_state=5, shuffle=True)

In [6]:
X_train.head()

Unnamed: 0,clonesize,honeybee,bumbles,andrena,osmia,MaxOfUpperTRange,MinOfUpperTRange,AverageOfUpperTRange,MaxOfLowerTRange,MinOfLowerTRange,AverageOfLowerTRange,RainingDays,AverageRainingDays,fruitset,fruitmass,seeds
0,25.0,0.5,0.25,0.75,0.5,69.7,42.1,58.2,50.2,24.3,41.2,24.0,0.39,0.425011,0.417545,32.460887
1,25.0,0.5,0.25,0.5,0.5,69.7,42.1,58.2,50.2,24.3,41.2,24.0,0.39,0.444908,0.422051,33.858317
2,12.5,0.25,0.25,0.63,0.63,86.0,52.0,71.9,62.0,30.0,50.8,24.0,0.39,0.552927,0.470853,38.341781
3,12.5,0.25,0.25,0.63,0.5,77.4,46.8,64.7,55.8,27.0,45.8,24.0,0.39,0.565976,0.478137,39.467561
4,25.0,0.5,0.25,0.63,0.63,77.4,46.8,64.7,55.8,27.0,45.8,24.0,0.39,0.579677,0.494165,40.484512


In [7]:
X_test.head()

Unnamed: 0,clonesize,honeybee,bumbles,andrena,osmia,MaxOfUpperTRange,MinOfUpperTRange,AverageOfUpperTRange,MaxOfLowerTRange,MinOfLowerTRange,AverageOfLowerTRange,RainingDays,AverageRainingDays,fruitset,fruitmass,seeds
0,25.0,0.25,0.25,0.25,0.25,86.0,52.0,71.9,62.0,30.0,50.8,24.0,0.39,0.399367,0.408088,31.394569
1,12.5,0.25,0.25,0.75,0.63,94.6,57.2,79.0,68.2,33.0,55.9,1.0,0.1,0.488048,0.442866,36.846956
2,12.5,0.25,0.25,0.63,0.63,86.0,52.0,71.9,62.0,30.0,50.8,16.0,0.26,0.583379,0.487057,40.037644
3,25.0,0.5,0.38,0.38,0.63,86.0,52.0,71.9,62.0,30.0,50.8,16.0,0.26,0.433014,0.422847,33.116091
4,37.5,0.75,0.25,0.25,0.25,94.6,57.2,79.0,68.2,33.0,55.9,24.0,0.39,0.360996,0.38886,29.558019


In [8]:
# Define pipelines
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor(n_neighbors=50))
])

ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('nystroem', Nystroem(n_components=500, random_state=5)),
    ('ridge', Ridge())
])

linear_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', LinearRegression()),
])

# Manually set pipeline names
knn_pipeline.name = 'KNN'
ridge_pipeline.name = 'Nystroem Ridge'
linear_pipeline.name = 'LR Pipeline'

In [9]:
models = [
    CatBoostRegressor(random_state=5, verbose=False, early_stopping_rounds=100),
    ExtraTreesRegressor(random_state=5),
    # GaussianNB(),
    HistGradientBoostingRegressor(random_state=5),
    knn_pipeline,
    linear_pipeline,
    LGBMRegressor(n_jobs=-1, random_state=5),
    LinearRegression(),
    RandomForestRegressor(random_state=5),
    XGBRegressor(random_state=5),
]

In [10]:
def evaluate_models(models, X, y, important_features, cv_split, experiment_name):
    Model_compare = pd.DataFrame(columns=['Model Name', 
                                        'Model Parameters', 
                                        'Model Train Accuracy', 
                                        'Model Test Accuracy', 
                                        'Model Test Accuracy Std', 
                                        'Model Time'])
    
    def evaluate_model(alg, idx):
        if hasattr(alg, 'name'):
            model_name = alg.name
        else:
            model_name = alg.__class__.__name__
        features = important_features.get(model_name, [])

        # Check if the list of important features is empty
        if len(features) == 0:
            # If empty, return results with zero values
            print(f'Skipping {model_name} due to no important features.')
            return {
                'Model Name': model_name,
                'Model Parameters': str(alg.get_params()),
                'Model Train Accuracy': 0,
                'Model Test Accuracy': 0,
                'Model Test Accuracy Std': 0,
                'Model Time': "0 min 0.00 sec",
            }
        
        cv_results = cross_validate(alg, 
                                    X[features], 
                                    y, cv=cv_split, 
                                    scoring='neg_mean_absolute_error', 
                                    return_train_score=True, 
                                    n_jobs=-1)

        # Time formatting
        mean_fit_time = cv_results['fit_time'].mean()
        minutes, seconds = divmod(mean_fit_time, 60)

        # Results population
        result = {
            'Model Name': model_name,
            'Model Parameters': str(alg.get_params()),
            'Model Train Accuracy': -cv_results['train_score'].mean(),
            'Model Test Accuracy': -cv_results['test_score'].mean(),
            'Model Test Accuracy Std': cv_results['test_score'].std(),
            'Model Time': f"{int(minutes)} min {seconds:.2f} sec",
        }

        print(f'Done with {model_name}.')
        return result

    results_list = []

    with ThreadPoolExecutor(max_workers=50) as executor:
        futures = [executor.submit(evaluate_model, alg, idx) for idx, alg in enumerate(tqdm(models, desc='Models'))]
        for future in tqdm(futures, total=len(futures), desc='Progress'):
            result = future.result()
            results_list.append(result)

    model_compare = pd.DataFrame(results_list)

    model_compare.sort_values(by=['Model Test Accuracy'], ascending=True, inplace=True)
    model_compare.to_csv(f'{experiment_name}_results.csv', index=False)

    return model_compare

In [11]:
baseline_features = {}

for model in models:
    if hasattr(model, 'name'):
        model_name = model.name
    else:
        model_name = model.__class__.__name__

    baseline_features[model_name] = list(X_train.columns)

In [12]:
%%time

baseline_models = evaluate_models(models, X_train, y_train, baseline_features, k10, f'{experiment_name}')
baseline_models

Models:   0%|          | 0/9 [00:00<?, ?it/s]

Progress:   0%|          | 0/9 [00:00<?, ?it/s]

Done with HistGradientBoostingRegressor.
Done with LGBMRegressor.
Done with XGBRegressor.
Done with KNN.
Done with LinearRegression.
Done with LR Pipeline.
Done with ExtraTreesRegressor.
Done with CatBoostRegressor.
Done with RandomForestRegressor.
CPU times: total: 312 ms
Wall time: 27 s


Unnamed: 0,Model Name,Model Parameters,Model Train Accuracy,Model Test Accuracy,Model Test Accuracy Std,Model Time
2,HistGradientBoostingRegressor,"{'categorical_features': None, 'early_stopping...",328.666081,355.479528,1.452854,0 min 0.94 sec
5,LGBMRegressor,"{'boosting_type': 'gbdt', 'class_weight': None...",314.487103,356.119295,0.761421,0 min 0.50 sec
0,CatBoostRegressor,"{'loss_function': 'RMSE', 'verbose': False, 'r...",300.162681,358.121005,0.588185,0 min 11.08 sec
7,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",137.575689,367.968505,0.94583,0 min 10.00 sec
4,LR Pipeline,"{'memory': None, 'steps': [('scaler', Standard...",370.792226,371.855852,3.770318,0 min 0.05 sec
6,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",370.792226,371.855852,3.770318,0 min 0.02 sec
8,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",240.757336,372.808122,2.311827,0 min 3.48 sec
1,ExtraTreesRegressor,"{'bootstrap': False, 'ccp_alpha': 0.0, 'criter...",0.088081,381.150267,1.702694,0 min 10.78 sec
3,KNN,"{'memory': None, 'steps': [('scaler', Standard...",442.486157,452.155949,4.004162,0 min 0.03 sec


In [20]:
with open('baseline_features.txt', mode='w') as f:
    pprint(baseline_features, stream=f)

# OPENFE

In [21]:
candidate_features_list = get_candidate_features(numerical_features=num_cols,
                                                     categorical_features=cat_cols,
                                                     ordinal_features=[])

In [27]:
%%time

# stage2_metric in ['gain_importance', 'permutation']
# stage1_metric in ['predictive', 'corr', 'mi']
# metric in ['binary_logloss', 'multi_logloss', 'auc', 'rmse']

ofe = OpenFE()

features = ofe.fit(
    data=X_train,
    label=y_train,
    candidate_features_list=candidate_features_list,
    categorical_features=cat_cols,
    n_data_blocks=2,
    # feature_boosting=True,
    task='regression',
    # stage2_metric='permutation',
    metric='rmse', 
    n_jobs=4,
    seed=5,
)

# 1 minute

The number of candidate features is 534
Start stage I selection.


100%|██████████| 16/16 [00:20<00:00,  1.30s/it]


225 same features have been deleted.
Meet early-stopping in successive feature-wise halving.


100%|██████████| 16/16 [00:21<00:00,  1.36s/it]


The number of remaining candidate features is 304
Start stage II selection.


100%|██████████| 16/16 [00:19<00:00,  1.23s/it]


Finish data processing.
CPU times: total: 3.5 s
Wall time: 1min 6s


In [28]:
# Get the list of OpenFE features

new_features_list = [feature for feature in features]
len(new_features_list)

304

In [29]:
# Check the best 25 features with their real feature names

topk=25
print(F'The top {topk} generated features are:')
for feature in new_features_list[:topk]:
    print(tree_to_formula(feature))

The top 25 generated features are:
(fruitset*seeds)
residual(fruitset)
(fruitset+seeds)
GroupByThenRank(seeds,MaxOfUpperTRange)
round(seeds)
(fruitmass+seeds)
max(fruitmass,seeds)
GroupByThenRank(fruitset,AverageOfLowerTRange)
GroupByThenRank(fruitset,MinOfUpperTRange)
GroupByThenRank(fruitset,MaxOfUpperTRange)
(fruitset*fruitmass)
(fruitset-fruitmass)
GroupByThenRank(seeds,AverageOfLowerTRange)
GroupByThenRank(seeds,MaxOfLowerTRange)
(fruitmass/seeds)
GroupByThenRank(fruitset,bumbles)
GroupByThenRank(fruitset,honeybee)
residual(seeds)
freq(seeds)
(fruitmass*seeds)
GroupByThenRank(fruitmass,MinOfLowerTRange)
GroupByThenRank(seeds,bumbles)
(fruitset/fruitmass)
(fruitset/seeds)
freq(fruitset)


In [57]:
# Transform the train and validation data to have the engineered features

train_ofe, test_ofe = transform(X_train, X_test, new_features_list, n_jobs=4)

# 1 min

In [58]:
train_ofe = train_ofe.reset_index(drop=True)
test_ofe = test_ofe.reset_index(drop=True)
train_ofe.shape, test_ofe.shape

((15289, 320), (10194, 320))

In [59]:
# See if there are any missing rows

train_ofe[train_ofe.isna().any(axis=1)]

Unnamed: 0,clonesize,honeybee,bumbles,andrena,osmia,MaxOfUpperTRange,MinOfUpperTRange,AverageOfUpperTRange,MaxOfLowerTRange,MinOfLowerTRange,AverageOfLowerTRange,RainingDays,AverageRainingDays,fruitset,fruitmass,seeds,autoFE_f_0,autoFE_f_1,autoFE_f_2,autoFE_f_3,autoFE_f_4,autoFE_f_5,autoFE_f_6,autoFE_f_7,autoFE_f_8,autoFE_f_9,autoFE_f_10,autoFE_f_11,autoFE_f_12,autoFE_f_13,autoFE_f_14,autoFE_f_15,autoFE_f_16,autoFE_f_17,autoFE_f_18,autoFE_f_19,autoFE_f_20,autoFE_f_21,autoFE_f_22,autoFE_f_23,autoFE_f_24,autoFE_f_25,autoFE_f_26,autoFE_f_27,autoFE_f_28,autoFE_f_29,autoFE_f_30,autoFE_f_31,autoFE_f_32,autoFE_f_33,autoFE_f_34,autoFE_f_35,autoFE_f_36,autoFE_f_37,autoFE_f_38,autoFE_f_39,autoFE_f_40,autoFE_f_41,autoFE_f_42,autoFE_f_43,autoFE_f_44,autoFE_f_45,autoFE_f_46,autoFE_f_47,autoFE_f_48,autoFE_f_49,autoFE_f_50,autoFE_f_51,autoFE_f_52,autoFE_f_53,autoFE_f_54,autoFE_f_55,autoFE_f_56,autoFE_f_57,autoFE_f_58,autoFE_f_59,autoFE_f_60,autoFE_f_61,autoFE_f_62,autoFE_f_63,autoFE_f_64,autoFE_f_65,autoFE_f_66,autoFE_f_67,autoFE_f_68,autoFE_f_69,autoFE_f_70,autoFE_f_71,autoFE_f_72,autoFE_f_73,autoFE_f_74,autoFE_f_75,autoFE_f_76,autoFE_f_77,autoFE_f_78,autoFE_f_79,autoFE_f_80,autoFE_f_81,autoFE_f_82,autoFE_f_83,autoFE_f_84,autoFE_f_85,autoFE_f_86,autoFE_f_87,autoFE_f_88,autoFE_f_89,autoFE_f_90,autoFE_f_91,autoFE_f_92,autoFE_f_93,autoFE_f_94,autoFE_f_95,autoFE_f_96,autoFE_f_97,autoFE_f_98,autoFE_f_99,autoFE_f_100,autoFE_f_101,autoFE_f_102,autoFE_f_103,autoFE_f_104,autoFE_f_105,autoFE_f_106,autoFE_f_107,autoFE_f_108,autoFE_f_109,autoFE_f_110,autoFE_f_111,autoFE_f_112,autoFE_f_113,autoFE_f_114,autoFE_f_115,autoFE_f_116,autoFE_f_117,autoFE_f_118,autoFE_f_119,autoFE_f_120,autoFE_f_121,autoFE_f_122,autoFE_f_123,autoFE_f_124,autoFE_f_125,autoFE_f_126,autoFE_f_127,autoFE_f_128,autoFE_f_129,autoFE_f_130,autoFE_f_131,autoFE_f_132,autoFE_f_133,autoFE_f_134,autoFE_f_135,autoFE_f_136,autoFE_f_137,autoFE_f_138,autoFE_f_139,autoFE_f_140,autoFE_f_141,autoFE_f_142,autoFE_f_143,autoFE_f_144,autoFE_f_145,autoFE_f_146,autoFE_f_147,autoFE_f_148,autoFE_f_149,autoFE_f_150,autoFE_f_151,autoFE_f_152,autoFE_f_153,autoFE_f_154,autoFE_f_155,autoFE_f_156,autoFE_f_157,autoFE_f_158,autoFE_f_159,autoFE_f_160,autoFE_f_161,autoFE_f_162,autoFE_f_163,autoFE_f_164,autoFE_f_165,autoFE_f_166,autoFE_f_167,autoFE_f_168,autoFE_f_169,autoFE_f_170,autoFE_f_171,autoFE_f_172,autoFE_f_173,autoFE_f_174,autoFE_f_175,autoFE_f_176,autoFE_f_177,autoFE_f_178,autoFE_f_179,autoFE_f_180,autoFE_f_181,autoFE_f_182,autoFE_f_183,autoFE_f_184,autoFE_f_185,autoFE_f_186,autoFE_f_187,autoFE_f_188,autoFE_f_189,autoFE_f_190,autoFE_f_191,autoFE_f_192,autoFE_f_193,autoFE_f_194,autoFE_f_195,autoFE_f_196,autoFE_f_197,autoFE_f_198,autoFE_f_199,autoFE_f_200,autoFE_f_201,autoFE_f_202,autoFE_f_203,autoFE_f_204,autoFE_f_205,autoFE_f_206,autoFE_f_207,autoFE_f_208,autoFE_f_209,autoFE_f_210,autoFE_f_211,autoFE_f_212,autoFE_f_213,autoFE_f_214,autoFE_f_215,autoFE_f_216,autoFE_f_217,autoFE_f_218,autoFE_f_219,autoFE_f_220,autoFE_f_221,autoFE_f_222,autoFE_f_223,autoFE_f_224,autoFE_f_225,autoFE_f_226,autoFE_f_227,autoFE_f_228,autoFE_f_229,autoFE_f_230,autoFE_f_231,autoFE_f_232,autoFE_f_233,autoFE_f_234,autoFE_f_235,autoFE_f_236,autoFE_f_237,autoFE_f_238,autoFE_f_239,autoFE_f_240,autoFE_f_241,autoFE_f_242,autoFE_f_243,autoFE_f_244,autoFE_f_245,autoFE_f_246,autoFE_f_247,autoFE_f_248,autoFE_f_249,autoFE_f_250,autoFE_f_251,autoFE_f_252,autoFE_f_253,autoFE_f_254,autoFE_f_255,autoFE_f_256,autoFE_f_257,autoFE_f_258,autoFE_f_259,autoFE_f_260,autoFE_f_261,autoFE_f_262,autoFE_f_263,autoFE_f_264,autoFE_f_265,autoFE_f_266,autoFE_f_267,autoFE_f_268,autoFE_f_269,autoFE_f_270,autoFE_f_271,autoFE_f_272,autoFE_f_273,autoFE_f_274,autoFE_f_275,autoFE_f_276,autoFE_f_277,autoFE_f_278,autoFE_f_279,autoFE_f_280,autoFE_f_281,autoFE_f_282,autoFE_f_283,autoFE_f_284,autoFE_f_285,autoFE_f_286,autoFE_f_287,autoFE_f_288,autoFE_f_289,autoFE_f_290,autoFE_f_291,autoFE_f_292,autoFE_f_293,autoFE_f_294,autoFE_f_295,autoFE_f_296,autoFE_f_297,autoFE_f_298,autoFE_f_299,autoFE_f_300,autoFE_f_301,autoFE_f_302,autoFE_f_303
487,25.0,0.5,0.25,0.5,0.5,77.4,46.8,64.7,55.8,27.0,45.8,34.0,0.25,0.403217,0.403129,32.690487,13.181371,0.403217,33.093705,0.125823,32.0,33.093616,32.690487,0.080787,0.080943,0.080931,0.162549,8.8e-05,0.125686,0.125842,0.012332,0.136492,0.144873,0.690487,48.0,13.17848,0.069357,0.239019,1.000219,0.012334,48.0,-32.287358,0.475136,0.331744,58.0,-32.28727,0.403217,4477.0,0.188685,7.0,1.0,0.104038,0.167228,0.129042,3610.0,0.158028,0.199382,0.286444,0.278305,0.403129,0.080943,3.0,0.174734,0.262484,1.0,0.806346,0.069336,17.0,8.0,2.0,0.634924,3.0,1.0,9.0,0.652144,0.069346,1.0,9101.0,1.0,12.0,0.192732,3378.0,1288.0,0.125842,1.0,2.0,1.0,1.0,0.0,1685.0,0.070664,3232.0,1.0,4318.0,0.483347,2.0,6.0,3904.0,0.652144,12.0,12.0,8.0,4255.0,46.369344,0.0,1.0,1.0,0.069357,1684.0,0.073136,8.0,5.0,0.446576,0.080956,1287.0,4408.0,1.0,3.999294,0.652144,0.0,8.0,8.0,3231.0,0.069346,13056.0,3.0,0.320727,0.42214,0.311921,0.497503,1.0,2.0,5.0,0.036618,23.412776,18.0,,6381.0,5872.0,22.0,0.652144,5.0,3.528332,46.585105,2.0,1.0,5.0,7.0,5.0,37.712513,8.0,0.53566,6379.0,7.0,1288.0,1.0,2.0,2.0,17.0,12.0,0.53566,5.0,3.959885,1.0,5.0,5.0,24.320627,3.0,,0.233554,5.0,5.0,2.0,1567.0,5.0,3.0,6380.0,2.0,9.0,37.402177,0.033306,0.074688,0.436183,0.320727,3233.0,8.0,0.460807,46.585105,4.0,6379.0,2.0,6381.0,23.412776,0.488876,5.0,0.320727,10.0,1685.0,3233.0,0.403129,1.0,2.0,0.652144,13814.0,4408.0,0.652144,1.0,1685.0,1.0,6380.0,7.0,2.0,0.403129,2.0,23.412776,46.585105,46.585105,5.0,5.0,0.032394,5.0,1.0,1.0,3.0,1.0,0.311921,0.507215,3378.0,12.0,35.739824,0.533821,36.392487,3377.0,36.166044,7.0,1.0,7.0,2.0,0.071655,3.817181,3.0,4409.0,3.0,1.0,1288.0,1287.0,4407.0,5.0,5.0,35.104165,4409.0,1.0,0.233554,6381.0,0.311921,5.0,2.0,5.0,4.0,6380.0,3378.0,1.0,0.53566,23.412776,3.0,1.0,23.412776,5.0,6381.0,6379.0,0.463759,3.0,3233.0,3331.0,2.0,18122.0,17.0,2.0,17.0,0.068009,6.0,2.0,0.233554,6.0,5.0,10.0,2.0,0.443,0.192732,23.412776,3376.0,0.233554,8428.0,14.0,1.0,3.469912,3.0,6.0,5.0,37.712513,6.0,0.652144,2.0,0.645641,0.437473,0.448406,0.125862,12.0,8.0,4.0,1.0,0.506261,0.403129,2.0,7.0,1.0,2.0,1.0,1685.0,6380.0
841,25.0,0.5,0.56,0.63,0.5,86.0,52.0,71.9,62.0,30.0,50.8,34.0,0.56,0.468833,0.432743,34.971315,16.395697,0.468833,35.440148,0.357194,34.0,35.404059,34.971315,0.287737,0.287737,0.287634,0.202884,0.036089,0.357143,0.357194,0.012374,1.0,0.38388,0.971315,43.0,15.133608,0.294135,1.0,1.083396,0.013406,1.0,-34.538572,0.715599,0.638283,1.0,-34.502482,0.468833,1.0,0.44368,7.0,0.638222,0.289611,1.0,0.337746,3610.0,0.401983,0.387886,0.608822,0.501955,0.432743,0.287778,5.0,0.42678,0.484935,0.608755,0.901576,0.294135,8.0,2.0,2.0,0.657832,5.0,12.0,3.0,0.652144,0.294092,3.0,1.0,1150.0,2.0,0.233554,3836.0,1581.0,0.357143,13.0,11.0,3.0,3330.0,7.0,1434.0,0.076395,3603.0,5.0,3268.0,0.483347,1.0,6.0,1.0,0.652144,8.0,8.0,2.0,3245.0,46.369344,7.0,2.0,0.715551,0.294092,1435.0,0.069575,2.0,5.0,0.44657,0.287737,1581.0,1.0,1580.0,3.753311,0.645641,0.0,5.0,2.0,3603.0,0.294135,13056.0,5.0,0.335339,0.42214,0.311921,0.468833,6.0,1.0,5.0,,23.412776,8.0,0.033308,6921.0,5872.0,3.0,0.652144,5.0,3.528332,46.585105,1.0,2.0,5.0,7.0,6.0,36.661684,5.0,0.53566,6923.0,7.0,1581.0,2.0,1.0,1.0,8.0,8.0,0.53566,10.0,,4.0,5.0,5.0,23.412776,5.0,0.071659,0.233554,5.0,5.0,1.0,1150.0,5.0,5.0,6922.0,12.0,9.0,36.508523,0.036841,,0.436183,0.320727,3603.0,8.0,0.452137,46.585105,1.0,6923.0,11.0,6922.0,23.412776,0.488876,5.0,0.320727,14.0,1435.0,3603.0,0.311921,1580.0,1.0,0.652144,13814.0,1.0,0.652144,5870.0,1434.0,1580.0,6922.0,7.0,2.0,0.532223,11.0,22.079199,46.585105,46.585105,5.0,5.0,0.032394,6.0,8.0,9.0,5.0,2.0,0.311921,0.504761,3836.0,8.0,34.971315,0.533821,36.085144,3836.0,35.923314,7.0,6.0,7.0,11.0,0.071655,4.104278,3.0,1.0,5.0,1.0,1581.0,1581.0,1.0,6.0,6.0,35.104165,1.0,1580.0,0.233554,6922.0,0.311921,1.0,1.0,10.0,5.0,6923.0,3836.0,1580.0,0.53566,22.079199,5.0,6.0,22.079199,5.0,6922.0,6922.0,0.45312,5.0,3603.0,3331.0,1.0,1.0,8.0,1.0,8.0,0.068009,1.0,1.0,0.233554,6.0,6.0,14.0,2.0,0.432743,0.192732,22.079199,3836.0,0.233554,1.0,2.0,5.0,3.469912,5.0,6.0,10.0,36.661684,6.0,0.652144,1.0,0.645641,0.437473,0.446562,0.357143,8.0,2.0,6.0,3609.0,0.502599,0.417915,1.0,7.0,5871.0,1.0,1.0,1435.0,6922.0
897,25.0,0.5,0.065,0.707,0.63,86.0,52.0,71.9,62.0,30.0,50.8,3.77,0.06,0.402052,0.401091,30.373012,12.211542,0.402052,30.775064,0.069633,30.0,30.774103,30.373012,0.114762,0.114762,0.114779,0.16126,0.000961,0.069623,0.069633,0.013206,0.333333,0.135429,0.373012,1.0,12.182354,0.096865,0.333333,1.002396,0.013237,74.0,-29.97192,0.287356,0.321839,44.0,-29.970959,0.402052,3.0,0.161611,7.0,0.329545,1.0,0.333333,1.0,26.0,0.14869,1.0,0.298851,0.097075,0.401091,0.114779,5.0,0.14927,0.087941,0.306818,0.803144,0.096865,18.0,23.0,10.0,0.633318,5.0,13.0,25.0,0.652144,0.096851,13.0,2.0,1.0,18.0,0.402052,3836.0,47.0,0.069623,14.0,12.0,14.0,20.0,8.0,1.0,0.076395,3603.0,5.0,1.0,0.483347,1.0,6.0,1.0,0.652144,17.0,17.0,22.0,1.0,30.373012,8.0,2.0,0.295455,0.096851,1.0,,22.0,5.0,0.401091,0.114762,47.0,3.0,48.0,,0.402052,0.0,5.0,22.0,3603.0,0.096865,13056.0,5.0,0.401091,0.423853,0.311921,0.468921,6.0,1.0,5.0,0.037935,23.412776,19.0,0.045132,6921.0,87.0,26.0,0.652144,5.0,4.861856,46.585105,1.0,2.0,5.0,7.0,6.0,36.661684,5.0,0.53566,6923.0,7.0,47.0,2.0,1.0,1.0,18.0,17.0,0.53566,10.0,4.72025,5.0,5.0,5.0,30.373012,5.0,0.087592,0.233554,5.0,1.0,1.0,1.0,1.0,5.0,6922.0,13.0,13.0,36.508523,0.036841,0.082477,0.436183,0.320727,3603.0,9.0,0.452137,46.585105,2.0,6923.0,12.0,6922.0,23.412776,0.488876,5.0,0.320727,14.0,1.0,3603.0,0.311921,48.0,1.0,0.652144,13814.0,3.0,0.652144,87.0,1.0,48.0,6922.0,7.0,2.0,0.529619,12.0,22.079199,46.585105,46.585105,5.0,5.0,0.032394,6.0,9.0,13.0,5.0,2.0,0.311921,0.402052,3836.0,18.0,33.999526,0.533821,30.373012,3836.0,30.373012,7.0,5.0,7.0,12.0,0.087632,4.104278,1.0,3.0,5.0,3.0,47.0,47.0,3.0,6.0,6.0,35.104165,3.0,48.0,0.233554,6922.0,0.311921,4.0,1.0,10.0,1.0,6923.0,3836.0,48.0,0.53566,22.079199,5.0,6.0,22.079199,5.0,6922.0,6922.0,0.45312,5.0,3603.0,20.0,1.0,3.0,18.0,1.0,18.0,0.068009,1.0,1.0,0.233554,6.0,6.0,15.0,17.0,0.43836,0.192732,22.079199,3836.0,0.233554,1.0,10.0,5.0,3.469912,5.0,6.0,10.0,36.661684,6.0,0.652144,1.0,0.628099,0.437473,0.401091,0.069623,17.0,22.0,1.0,26.0,0.402052,0.425801,1.0,7.0,88.0,1.0,1.0,1.0,6922.0
3982,12.5,0.25,0.38,0.24,0.5,94.6,57.2,79.0,68.2,33.0,55.9,16.0,0.26,0.506743,0.449546,35.134955,17.804382,0.506743,35.641697,0.446537,35.0,35.5845,35.134955,0.556052,0.556122,0.556017,0.227804,0.057197,0.44634,0.446269,0.012795,0.38687,0.363351,0.134955,17.0,15.794769,0.543534,0.294935,1.127233,0.014423,20.0,-34.685409,0.246037,0.402878,59.0,-34.628212,0.506743,2172.0,0.370114,5.0,0.402878,1.0,0.424718,1.0,3418.0,0.315437,1.0,0.362486,0.238925,0.449546,0.555963,7.0,0.413619,0.287825,0.362486,0.956288,0.543489,5.0,27.0,6.0,0.670482,7.0,17.0,30.0,0.652144,0.543607,20.0,2544.0,1.0,6.0,0.506743,2721.0,1757.0,0.446429,4.0,5.0,21.0,3639.0,2.0,1.0,0.080261,2971.0,5.0,1.0,0.524854,4.0,4.0,1.0,0.652144,5.0,5.0,26.0,1.0,35.134955,2.0,4.0,0.246037,0.543707,1.0,,26.0,6.0,0.449546,0.556229,1756.0,1749.0,1756.0,,0.506743,1.0,5.0,26.0,2969.0,0.543607,11101.0,7.0,0.449546,0.456046,0.320727,0.529071,4.0,3.0,5.0,0.036363,23.412776,5.0,0.032673,6266.0,7192.0,31.0,0.652144,6.0,3.482353,46.585105,3.0,1.0,5.0,5.0,6.0,35.756223,5.0,0.53566,6268.0,5.0,1756.0,4.0,2.0,3.0,5.0,5.0,0.53566,7.0,3.969682,5.0,6.0,6.0,35.134955,7.0,0.062789,0.192732,5.0,1.0,3.0,1.0,1.0,7.0,6271.0,5.0,8.0,35.864247,0.038244,0.070607,0.46148,0.320727,2969.0,3.0,0.445705,46.369344,5.0,6269.0,5.0,6266.0,23.412776,0.5369,6.0,0.320727,12.0,1.0,2971.0,0.320727,1756.0,1.0,0.652144,11129.0,1750.0,0.652144,7192.0,1.0,1756.0,6264.0,5.0,1.0,0.53566,5.0,23.412776,46.369344,46.585105,4.0,5.0,0.038487,6.0,3.0,8.0,7.0,3.0,0.320727,0.506743,2722.0,5.0,37.22826,0.53566,35.134955,2721.0,35.134955,5.0,5.0,5.0,5.0,0.062789,4.268885,1.0,1749.0,7.0,2172.0,1756.0,1756.0,1748.0,6.0,6.0,37.388526,1747.0,1757.0,0.192732,6265.0,0.320727,3.0,3.0,7.0,1.0,6270.0,2719.0,1756.0,0.53566,23.412776,7.0,4.0,23.412776,6.0,6272.0,6269.0,0.446451,7.0,2972.0,3639.0,3.0,7266.0,5.0,3.0,5.0,0.067236,4.0,2.0,0.192732,5.0,6.0,4.0,6.0,0.455488,0.233554,23.412776,2719.0,0.192732,2606.0,6.0,5.0,4.188904,7.0,4.0,7.0,35.756223,6.0,0.652144,2.0,0.652144,0.457101,0.449546,0.446483,5.0,26.0,1.0,3418.0,0.506743,0.457846,3.0,5.0,7192.0,3.0,1.0,1.0,6270.0
4632,20.0,0.0,0.042,0.101,0.021,86.0,52.0,71.9,62.0,30.0,50.8,3.77,0.06,0.331518,0.320727,28.296663,9.38085,0.331518,28.628181,0.020081,28.0,28.617391,28.296663,0.014733,0.014733,0.014736,0.106327,0.010791,0.020078,0.020081,0.011334,0.142857,0.086957,0.296663,2.0,9.075513,0.000578,0.285714,1.033644,0.011716,1.0,-27.975936,0.172414,0.028736,12.0,-27.965145,0.331518,7.0,0.029412,7.0,0.028409,1.0,0.285714,1.0,44.0,0.070588,1.0,0.057471,0.2,0.320727,0.014736,12.0,0.086957,0.217391,0.056818,0.652245,0.000578,18.0,30.0,14.0,0.566328,11.0,20.0,33.0,0.652144,0.000578,22.0,5.0,1.0,15.0,0.331518,47.0,47.0,0.020078,19.0,17.0,24.0,21.0,16.0,1.0,0.076395,16.0,5.0,1.0,0.450941,1.0,6.0,1.0,0.652144,17.0,17.0,29.0,1.0,28.296663,16.0,2.0,0.170455,0.000578,1.0,,29.0,5.0,0.320727,0.014733,47.0,5.0,48.0,,0.331518,6.0,5.0,29.0,16.0,0.000578,19.0,11.0,0.320727,0.423853,0.311921,0.462997,6.0,1.0,5.0,0.064438,22.079199,19.0,0.045132,6921.0,87.0,34.0,0.608931,5.0,4.861856,45.274419,1.0,2.0,5.0,3.0,6.0,36.661684,5.0,0.53566,6923.0,6.0,47.0,2.0,1.0,1.0,18.0,17.0,0.53566,10.0,5.870828,5.0,5.0,5.0,28.296663,13.0,0.087592,0.233554,5.0,1.0,1.0,1.0,1.0,12.0,6922.0,17.0,13.0,36.508523,0.036841,0.093123,0.416028,0.311921,16.0,9.0,0.452137,46.585105,2.0,6923.0,17.0,6922.0,22.079199,0.462997,5.0,0.311921,14.0,1.0,16.0,0.311921,48.0,1.0,0.652144,85.0,5.0,0.652144,87.0,1.0,48.0,6922.0,7.0,2.0,0.529619,17.0,22.079199,46.585105,42.167705,4.0,6.0,0.046676,6.0,9.0,13.0,12.0,2.0,0.311921,0.331518,47.0,18.0,31.297182,0.520004,28.296663,47.0,28.296663,7.0,5.0,7.0,17.0,0.087632,4.104278,1.0,5.0,11.0,7.0,47.0,47.0,5.0,6.0,6.0,32.006728,5.0,48.0,0.233554,6922.0,0.311921,4.0,1.0,10.0,1.0,6923.0,47.0,48.0,0.53566,22.079199,11.0,6.0,22.079199,3.0,6922.0,6922.0,0.45312,11.0,16.0,20.0,1.0,7.0,18.0,1.0,18.0,0.085158,2.0,1.0,0.233554,9.0,6.0,20.0,14.0,0.393792,0.284443,22.079199,47.0,0.233554,3.0,15.0,5.0,4.704133,12.0,6.0,10.0,36.661684,3.0,0.652144,1.0,0.628099,0.412765,0.320727,0.020078,17.0,29.0,1.0,45.0,0.331518,0.425801,1.0,7.0,88.0,1.0,1.0,1.0,6922.0
5940,20.0,18.43,0.293,0.234,0.058,77.4,46.8,64.7,55.8,27.0,45.3,34.0,0.56,0.372184,0.372179,29.702301,11.054712,0.372184,30.074484,0.029615,29.0,30.07448,29.702301,0.6,0.031813,0.031808,0.138519,5e-06,0.6,0.029619,0.01253,0.5,0.333333,0.702301,3.0,11.054575,0.006426,0.5,1.000012,0.01253,1.0,-29.330122,0.124659,0.030313,30.0,-29.330117,0.372184,1.0,0.141176,7.0,0.030319,1.0,0.5,1.0,11.0,0.188235,1.0,0.11904,0.247059,0.372179,0.031813,20.0,0.444444,0.444444,0.11906,0.744363,0.006424,22.0,32.0,12.0,0.610065,13.0,22.0,35.0,0.652144,0.006425,25.0,2.0,1.0,22.0,0.372184,2.0,1288.0,0.029619,20.0,18.0,27.0,2.0,18.0,1.0,0.096248,2.0,5.0,1.0,0.461022,2.0,6.0,1.0,0.652144,12.0,12.0,31.0,1.0,29.702301,18.0,1.0,0.124681,0.006426,1.0,,32.0,5.0,0.372179,0.031818,1287.0,1.0,1287.0,,0.372184,11.0,8.0,31.0,2.0,0.006425,9.0,13.0,0.372179,0.42214,0.311921,0.434391,6.0,2.0,5.0,0.053018,22.079199,12.0,0.033308,6381.0,5872.0,36.0,0.582483,5.0,3.528332,45.274419,2.0,2.0,5.0,2.0,5.0,37.712513,8.0,0.53566,6379.0,6.0,1288.0,1.0,2.0,5.0,12.0,12.0,0.53566,5.0,3.660428,4.0,5.0,5.0,29.702301,20.0,0.071659,0.309669,5.0,1.0,6.0,1.0,1.0,19.0,1.0,18.0,9.0,37.402177,0.033306,0.087974,0.416028,0.311921,2.0,8.0,0.460807,46.585105,1.0,1.0,18.0,1.0,26.487322,0.488048,5.0,0.355876,10.0,1.0,1.0,0.311921,1287.0,2.0,0.652144,85.0,1.0,0.652144,5870.0,1.0,1286.0,6380.0,7.0,3.0,0.532223,19.0,23.412776,46.585105,41.176201,1.0,6.0,0.048226,5.0,8.0,9.0,19.0,1.0,0.311921,0.372184,16.0,12.0,32.290614,0.520004,29.702301,16.0,29.702301,4.0,6.0,7.0,18.0,0.071655,3.817181,3.0,1.0,13.0,1.0,1288.0,2.0,1.0,5.0,5.0,31.683054,1.0,1287.0,0.233554,6381.0,0.311921,1.0,2.0,5.0,1.0,1.0,16.0,2.0,0.53566,27.345454,19.0,6.0,23.412776,2.0,6381.0,6379.0,0.463759,14.0,2.0,2.0,2.0,2.0,12.0,2.0,12.0,0.085158,2.0,2.0,0.233554,5.0,5.0,21.0,21.0,0.409668,0.284443,23.412776,16.0,0.233554,1.0,12.0,5.0,5.218015,19.0,6.0,5.0,37.712513,4.0,0.652144,2.0,0.645641,0.402691,0.372179,0.029624,12.0,31.0,1.0,10.0,0.372184,0.417915,2.0,7.0,5871.0,2.0,1.0,1.0,6380.0
9103,20.0,0.0,0.293,0.235,0.058,94.6,57.2,79.0,68.2,33.0,55.9,3.77,0.06,0.496598,0.447157,34.878927,17.320813,0.496598,35.375526,0.418289,34.0,35.326085,34.878927,0.506618,0.506696,0.506543,0.222058,0.049441,0.418115,0.418048,0.01282,1.0,0.695652,0.878927,104.0,15.59637,0.519455,1.0,1.110567,0.014238,25.0,-34.43177,0.649425,0.689655,17.0,-34.382329,0.496598,1.0,0.705882,5.0,0.693182,1.0,1.0,1.0,44.0,0.658824,1.0,0.597701,0.7,0.447157,0.506537,23.0,0.73913,0.782609,0.602273,0.943756,0.51939,16.0,35.0,14.0,0.668698,18.0,26.0,38.0,0.652144,0.519531,31.0,2.0,1.0,15.0,0.496598,14.0,19.0,0.418128,23.0,22.0,31.0,21.0,21.0,1.0,0.080261,4.0,5.0,1.0,0.450941,4.0,6.0,1.0,0.652144,16.0,16.0,35.0,1.0,34.878927,18.0,4.0,0.653409,0.51962,1.0,,35.0,5.0,0.447157,0.506779,19.0,1.0,19.0,,0.496598,6.0,5.0,34.0,4.0,0.519531,19.0,17.0,0.447157,0.423853,0.320727,0.434391,6.0,3.0,5.0,0.053018,22.079199,16.0,0.045132,6266.0,87.0,39.0,0.608931,5.0,4.861856,45.274419,3.0,2.0,5.0,3.0,6.0,35.756223,5.0,0.53566,6268.0,6.0,19.0,4.0,2.0,3.0,16.0,16.0,0.53566,7.0,3.660428,5.0,6.0,6.0,34.878927,23.0,0.087592,0.192732,5.0,1.0,3.0,1.0,1.0,22.0,6271.0,21.0,13.0,35.864247,0.038244,0.087974,0.416028,0.311921,4.0,9.0,0.445705,46.369344,1.0,6269.0,21.0,6266.0,22.079199,0.462997,6.0,0.311921,12.0,1.0,4.0,0.311921,19.0,1.0,0.652144,85.0,1.0,0.652144,87.0,1.0,19.0,6264.0,5.0,1.0,0.529619,22.0,23.412776,46.369344,42.167705,4.0,6.0,0.046676,6.0,9.0,13.0,22.0,3.0,0.320727,0.496598,14.0,16.0,32.290614,0.520004,34.878927,14.0,34.878927,5.0,5.0,5.0,21.0,0.087632,4.268885,1.0,1.0,17.0,1.0,19.0,19.0,1.0,6.0,6.0,32.006728,1.0,19.0,0.192732,6265.0,0.311921,4.0,3.0,7.0,1.0,6270.0,14.0,19.0,0.53566,23.412776,17.0,6.0,23.412776,3.0,6272.0,6269.0,0.446451,18.0,4.0,20.0,3.0,2.0,16.0,3.0,16.0,0.085158,2.0,2.0,0.192732,9.0,6.0,24.0,14.0,0.409668,0.284443,23.412776,14.0,0.192732,1.0,15.0,5.0,4.704133,23.0,6.0,7.0,35.756223,3.0,0.652144,2.0,0.628099,0.412765,0.447157,0.418249,16.0,34.0,1.0,45.0,0.496598,0.425801,3.0,5.0,88.0,3.0,1.0,1.0,6270.0
10781,12.5,0.25,0.26,0.63,0.63,86.0,52.0,71.9,62.0,30.0,50.8,1.0,0.1,0.586603,0.488219,37.747509,22.142785,0.586603,38.334111,0.616368,37.0,38.235728,37.747509,0.84472,0.84472,0.844698,0.28639,0.098384,0.616424,0.616513,0.012934,1.0,0.776682,0.747509,49.0,18.429049,0.825773,1.0,1.201515,0.01554,34.0,-37.25929,0.381692,0.704145,1.0,-37.160906,0.586603,1.0,0.742744,7.0,0.704095,0.889491,1.0,0.882671,3293.0,0.757975,0.670277,0.720759,0.464822,0.488219,0.844698,1.0,0.762691,0.505159,0.720712,1.074821,0.825773,4.0,2.0,3.0,0.698727,1.0,29.0,17.0,0.652144,0.825798,2.0,1.0,1125.0,3.0,0.233554,2908.0,1643.0,0.616424,25.0,24.0,2.0,3418.0,23.0,1434.0,0.076395,3207.0,5.0,1713.0,0.524854,1.0,4.0,1.0,0.652144,4.0,4.0,2.0,1736.0,46.369344,23.0,2.0,0.381588,0.825798,1435.0,0.069575,2.0,6.0,0.44657,0.84472,1644.0,1.0,1644.0,3.753311,0.645641,1.0,5.0,2.0,3208.0,0.825917,11101.0,1.0,0.335339,0.468665,0.311921,0.586603,6.0,1.0,5.0,,23.412776,4.0,0.032801,6921.0,5959.0,17.0,0.652144,6.0,3.498768,46.585105,1.0,1.0,5.0,5.0,6.0,36.661684,5.0,0.53566,6923.0,5.0,1644.0,2.0,1.0,1.0,4.0,4.0,0.53566,10.0,,6.0,5.0,5.0,23.412776,1.0,0.059077,0.233554,6.0,5.0,1.0,1126.0,5.0,1.0,6922.0,23.0,9.0,36.508523,0.036841,,0.46148,0.320727,3207.0,5.0,0.452137,46.585105,1.0,6923.0,23.0,6922.0,23.412776,0.5369,5.0,0.320727,14.0,1435.0,3208.0,0.349354,1644.0,1.0,0.652144,11129.0,1.0,0.652144,5958.0,1434.0,1644.0,6922.0,7.0,2.0,0.53566,24.0,22.079199,46.585105,46.585105,4.0,5.0,0.038487,6.0,5.0,9.0,1.0,2.0,0.311921,0.504761,2908.0,4.0,37.747509,0.53566,36.085144,2907.0,35.923314,7.0,5.0,7.0,23.0,0.059072,4.104278,2.0,1.0,1.0,1.0,1644.0,1644.0,1.0,6.0,6.0,37.388526,1.0,1643.0,0.233554,6922.0,0.349354,2.0,1.0,10.0,5.0,6923.0,2907.0,1644.0,0.53566,22.079199,1.0,4.0,22.079199,6.0,6922.0,6922.0,0.45312,1.0,3208.0,3418.0,1.0,1.0,4.0,1.0,4.0,0.067236,1.0,1.0,0.233554,5.0,6.0,26.0,3.0,0.488219,0.233554,22.079199,2908.0,0.233554,1.0,3.0,6.0,4.188904,1.0,6.0,10.0,36.661684,6.0,0.652144,1.0,0.652144,0.457101,0.446562,0.616424,4.0,2.0,6.0,3293.0,0.502599,0.471161,1.0,7.0,5958.0,1.0,1.0,1435.0,6922.0
13493,20.0,18.43,0.0,0.0,0.0,77.4,46.8,64.7,55.8,27.0,45.8,24.0,0.07,0.284443,0.355876,26.487322,7.534123,0.284443,26.771765,0.002742,26.0,26.843198,26.487322,0.003683,0.003683,0.003682,0.101226,-0.071433,0.002743,0.002743,0.013436,0.1,0.111111,0.487322,14.0,9.42619,0.002978,0.1,0.799276,0.010739,21.0,-26.131447,0.003139,0.005023,13.0,-26.20288,0.355876,1.0,0.058824,7.0,1.0,0.083333,0.2,0.166667,14.0,0.011765,0.083333,0.004552,0.023529,0.284443,0.003683,20.0,0.111111,0.111111,1.0,0.640318,0.002977,24.0,25.0,15.0,0.596553,13.0,14.0,43.0,0.652144,0.002978,15.0,10.0,1.0,27.0,0.284443,15.0,1535.0,0.002743,27.0,25.0,19.0,1.0,13.0,2.0,0.070664,2.0,1.0,11.0,0.461022,2.0,4.0,8.0,0.652144,2.0,2.0,24.0,5.0,41.176201,10.0,1.0,1.0,0.002978,2.0,0.106595,24.0,5.0,0.384171,0.003683,1535.0,1.0,1.0,5.105613,0.582483,11.0,8.0,24.0,2.0,0.002978,9.0,13.0,0.33624,0.437646,0.311921,0.525939,1.0,2.0,5.0,0.046182,22.079199,23.0,,6381.0,6371.0,44.0,0.582483,4.0,3.420945,45.274419,2.0,1.0,5.0,2.0,5.0,37.712513,8.0,0.53566,6379.0,6.0,1535.0,1.0,2.0,2.0,24.0,2.0,0.53566,5.0,4.597316,1.0,5.0,5.0,26.487322,20.0,,0.233554,6.0,5.0,2.0,1.0,3.0,19.0,6380.0,24.0,9.0,37.402177,0.033306,0.106192,0.416028,0.311921,2.0,5.0,0.460807,46.585105,1.0,6379.0,24.0,6381.0,26.487322,0.488048,5.0,0.355876,10.0,2.0,1.0,0.355876,1.0,2.0,0.652144,85.0,1.0,0.652144,1.0,2.0,1.0,6380.0,7.0,2.0,0.355876,25.0,23.412776,46.585105,41.176201,1.0,6.0,0.048226,5.0,1.0,1.0,25.0,1.0,0.311921,0.513049,16.0,2.0,30.206941,0.520004,30.783929,16.0,28.585608,7.0,1.0,7.0,24.0,0.066068,3.817181,2.0,1.0,13.0,1.0,1535.0,1535.0,1.0,5.0,5.0,31.683054,1.0,1.0,0.233554,6381.0,0.320727,7.0,2.0,5.0,2.0,6380.0,16.0,1.0,0.53566,23.412776,13.0,1.0,23.412776,2.0,6381.0,6379.0,0.463759,14.0,2.0,1.0,2.0,10.0,23.0,2.0,24.0,0.085158,2.0,2.0,0.233554,5.0,5.0,28.0,26.0,0.390879,0.284443,23.412776,16.0,0.233554,5.0,24.0,1.0,5.218015,19.0,6.0,5.0,37.712513,4.0,0.652144,2.0,0.645641,0.402691,0.396528,0.002743,2.0,24.0,3.0,1.0,0.462662,0.355876,2.0,7.0,1.0,2.0,1.0,2.0,6380.0
13528,12.5,0.25,0.25,0.63,0.63,86.0,52.0,71.9,52.0,30.0,50.8,1.0,0.1,0.581744,0.489992,40.641638,23.643042,0.581744,41.223382,0.831841,40.0,41.13163,40.641638,0.810848,0.810848,0.810821,0.28505,0.091752,0.831865,1.0,0.012056,0.857908,0.72856,0.641638,26.0,19.914094,0.836102,0.880697,1.187252,0.014314,47.0,-40.151645,0.700453,0.721933,26.0,-40.059894,0.581744,3955.0,0.759233,1.0,0.721887,0.852988,0.888506,0.892499,3293.0,0.705769,0.88568,0.672932,0.741935,0.489992,1.0,1.0,0.77792,0.762196,0.672877,1.071737,0.836102,4.0,2.0,3.0,0.699995,1.0,2.0,17.0,0.652144,0.836126,2.0,8581.0,1125.0,3.0,0.233554,2908.0,1.0,0.831865,3.0,1.0,2.0,3418.0,1.0,1434.0,0.076395,1.0,5.0,1713.0,0.524854,1.0,4.0,4375.0,0.581744,4.0,4.0,2.0,1736.0,46.369344,1.0,2.0,0.700403,0.836126,1435.0,0.069575,2.0,6.0,0.44657,0.810848,1644.0,4939.0,1644.0,3.753311,0.645641,1.0,5.0,40.0,3208.0,1.0,11101.0,23.0,0.335339,0.468665,0.311921,0.497503,6.0,7.0,5.0,0.036618,23.412776,4.0,0.032801,1.0,5959.0,17.0,0.652144,6.0,3.498768,46.585105,1.0,1.0,5.0,5.0,6.0,36.661684,5.0,0.53566,6923.0,5.0,1644.0,1.0,1.0,1.0,4.0,4.0,0.53566,10.0,3.959885,6.0,5.0,5.0,23.412776,1.0,0.059077,0.233554,6.0,5.0,1.0,1126.0,5.0,25.0,1.0,1.0,9.0,36.508523,0.036841,0.074688,0.46148,0.320727,3207.0,5.0,0.452137,46.585105,4.0,6923.0,1.0,6922.0,23.412776,0.5369,5.0,0.320727,14.0,1435.0,3208.0,0.349354,1644.0,1.0,0.652144,11129.0,1.0,0.652144,5958.0,1.0,1644.0,6922.0,7.0,2.0,0.53566,1.0,22.079199,46.585105,46.585105,4.0,5.0,0.038487,6.0,5.0,9.0,1.0,2.0,0.489992,0.504761,2908.0,4.0,35.739824,0.53566,36.085144,1.0,35.923314,7.0,5.0,7.0,25.0,0.059072,,2.0,4939.0,1.0,3954.0,1644.0,1644.0,4939.0,1.0,6.0,37.388526,4938.0,1.0,0.233554,6922.0,0.349354,2.0,7.0,1.0,5.0,6923.0,2907.0,1644.0,0.489992,22.079199,1.0,4.0,40.641638,6.0,1.0,1.0,0.45312,1.0,3208.0,3418.0,1.0,18122.0,24.0,1.0,4.0,0.067236,6.0,1.0,0.581744,5.0,6.0,3.0,3.0,0.443,0.233554,22.079199,2908.0,0.233554,9506.0,3.0,6.0,4.188904,1.0,6.0,10.0,36.661684,6.0,0.652144,1.0,0.652144,0.457101,0.446562,0.831865,22.0,2.0,6.0,3293.0,0.502599,0.471161,8.0,7.0,5958.0,7.0,1.0,1435.0,1.0


In [77]:
train_ofe['autoFE_f_45'].sort_values().unique()

[0.0, 1.0, 2.0, 3.0, 4.0, ..., 21.0, 22.0, 23.0, 24.0, 25.0]
Length: 26
Categories (26, float64): [0.0, 1.0, 2.0, 3.0, ..., 22.0, 23.0, 24.0, 25.0]

In [60]:
# Check what columns have missing NaN
# if the previous step is zero then nothing will show here

nan_cols = train_ofe.columns[train_ofe.isna().any()].tolist()
nan_cols

['autoFE_f_93',
 'autoFE_f_101',
 'autoFE_f_117',
 'autoFE_f_120',
 'autoFE_f_126',
 'autoFE_f_146',
 'autoFE_f_152',
 'autoFE_f_165',
 'autoFE_f_222',
 'autoFE_f_223']

In [61]:
# Get the list of OpenFE features but with their real names and add the original features
# I just prefer the DataFrame to have real feature names than a bunch of AutoFE_d_1

ofe_cols = [tree_to_formula(feature) for feature in new_features_list]
ofe_cols

['(fruitset*seeds)',
 'residual(fruitset)',
 '(fruitset+seeds)',
 'GroupByThenRank(seeds,MaxOfUpperTRange)',
 'round(seeds)',
 '(fruitmass+seeds)',
 'max(fruitmass,seeds)',
 'GroupByThenRank(fruitset,AverageOfLowerTRange)',
 'GroupByThenRank(fruitset,MinOfUpperTRange)',
 'GroupByThenRank(fruitset,MaxOfUpperTRange)',
 '(fruitset*fruitmass)',
 '(fruitset-fruitmass)',
 'GroupByThenRank(seeds,AverageOfLowerTRange)',
 'GroupByThenRank(seeds,MaxOfLowerTRange)',
 '(fruitmass/seeds)',
 'GroupByThenRank(fruitset,bumbles)',
 'GroupByThenRank(fruitset,honeybee)',
 'residual(seeds)',
 'freq(seeds)',
 '(fruitmass*seeds)',
 'GroupByThenRank(fruitmass,MinOfLowerTRange)',
 'GroupByThenRank(seeds,bumbles)',
 '(fruitset/fruitmass)',
 '(fruitset/seeds)',
 'freq(fruitset)',
 '(fruitmass-seeds)',
 'GroupByThenRank(seeds,RainingDays)',
 'GroupByThenRank(fruitmass,RainingDays)',
 'freq(fruitmass)',
 '(fruitset-seeds)',
 'max(fruitset,fruitmass)',
 'CombineThenFreq(bumbles,RainingDays)',
 'GroupByThenRank(fru

In [62]:
# Add the OpenFE features to the original features

original_feats = list(X_train.columns)
original_feats.extend(ofe_cols)
original_feats

['clonesize',
 'honeybee',
 'bumbles',
 'andrena',
 'osmia',
 'MaxOfUpperTRange',
 'MinOfUpperTRange',
 'AverageOfUpperTRange',
 'MaxOfLowerTRange',
 'MinOfLowerTRange',
 'AverageOfLowerTRange',
 'RainingDays',
 'AverageRainingDays',
 'fruitset',
 'fruitmass',
 'seeds',
 '(fruitset*seeds)',
 'residual(fruitset)',
 '(fruitset+seeds)',
 'GroupByThenRank(seeds,MaxOfUpperTRange)',
 'round(seeds)',
 '(fruitmass+seeds)',
 'max(fruitmass,seeds)',
 'GroupByThenRank(fruitset,AverageOfLowerTRange)',
 'GroupByThenRank(fruitset,MinOfUpperTRange)',
 'GroupByThenRank(fruitset,MaxOfUpperTRange)',
 '(fruitset*fruitmass)',
 '(fruitset-fruitmass)',
 'GroupByThenRank(seeds,AverageOfLowerTRange)',
 'GroupByThenRank(seeds,MaxOfLowerTRange)',
 '(fruitmass/seeds)',
 'GroupByThenRank(fruitset,bumbles)',
 'GroupByThenRank(fruitset,honeybee)',
 'residual(seeds)',
 'freq(seeds)',
 '(fruitmass*seeds)',
 'GroupByThenRank(fruitmass,MinOfLowerTRange)',
 'GroupByThenRank(seeds,bumbles)',
 '(fruitset/fruitmass)',
 '(f

In [63]:
# Do the feature name change to understandable names
# Check the names of the columns with NaN

known_names_df = train_ofe.copy()

if len(original_feats) == len(train_ofe.columns):
    known_names_df.columns = original_feats
    print('OFE column names have changed to understandable column names')

known_col_names_nan_cols = known_names_df.columns[known_names_df.isna().any()].tolist()
known_col_names_nan_cols

OFE column names have changed to understandable column names


['GroupByThenStd(fruitset,andrena)',
 'GroupByThenStd(seeds,andrena)',
 'GroupByThenStd(fruitmass,bumbles)',
 'GroupByThenStd(fruitmass,AverageRainingDays)',
 'GroupByThenStd(seeds,RainingDays)',
 'GroupByThenStd(seeds,bumbles)',
 'GroupByThenStd(fruitset,AverageRainingDays)',
 'GroupByThenStd(fruitset,bumbles)',
 'GroupByThenStd(fruitset,RainingDays)',
 'GroupByThenStd(seeds,MaxOfLowerTRange)']

In [64]:
std_by_group = X_train.groupby('fruitset')['andrena'].std()
# std_by_group = X_train.groupby('fruitset')['andrena'].count()
# std_by_group = X_train.groupby('fruitset')['andrena'].mean()
std_by_group

fruitset
0.192732         NaN
0.226568         NaN
0.233554    0.110567
0.235916         NaN
0.237874         NaN
              ...   
0.642882    0.137155
0.644329    0.104003
0.645475    0.112229
0.645641    0.095185
0.652144    0.151052
Name: andrena, Length: 1526, dtype: float64

*All the columns with NaN are GroupByThenStd because those group contains less than two data points thus the standard deviation cannot be calculated so I think we can drop them*

In [65]:
# Drop all the columns with NaN
train_ofe_clean = train_ofe.drop(nan_cols, axis=1)
test_ofe_clean = test_ofe.drop(nan_cols, axis=1)
train_ofe.shape, test_ofe.shape, train_ofe_clean.shape, test_ofe_clean.shape

((15289, 320), (10194, 320), (15289, 310), (10194, 310))

In [90]:
# Convert numerical_column to numeric
for column in list(train_ofe_clean.columns):
    train_ofe_clean[column] = pd.to_numeric(train_ofe_clean[column])
    test_ofe_clean[column] = pd.to_numeric(test_ofe_clean[column])

In [95]:
models = [
    CatBoostRegressor(random_state=5, verbose=False, early_stopping_rounds=100),
    ExtraTreesRegressor(random_state=5),
    # GaussianNB(),
    HistGradientBoostingRegressor(random_state=5),
    knn_pipeline,
    linear_pipeline,
    LGBMRegressor(n_jobs=-1, random_state=5),
    LinearRegression(),
    RandomForestRegressor(random_state=5),
    XGBRegressor(random_state=5),
]

In [96]:
openfe_features = {}

for model in models:
    if hasattr(model, 'name'):
        model_name = model.name
    else:
        model_name = model.__class__.__name__

    openfe_features[model_name] = list(train_ofe_clean.columns)

In [97]:
with open('openfe_features_ofe.txt', mode='w') as f:
    pprint(openfe_features, stream=f)

In [98]:
%%time

openfe_models = evaluate_models(models, train_ofe_clean, y_train, openfe_features, k10, f'{experiment_name}')
openfe_models

Models:   0%|          | 0/9 [00:00<?, ?it/s]

Progress:   0%|          | 0/9 [00:00<?, ?it/s]

Done with KNN.
Done with LR Pipeline.
Done with HistGradientBoostingRegressor.
Done with XGBRegressor.
Done with LGBMRegressor.
Done with LinearRegression.
Done with CatBoostRegressor.
Done with ExtraTreesRegressor.
Done with RandomForestRegressor.
CPU times: total: 8.3 s
Wall time: 5min 21s


Unnamed: 0,Model Name,Model Parameters,Model Train Accuracy,Model Test Accuracy,Model Test Accuracy Std,Model Time
2,HistGradientBoostingRegressor,"{'categorical_features': None, 'early_stopping...",306.72342,356.6724,0.6843644,0 min 10.85 sec
5,LGBMRegressor,"{'boosting_type': 'gbdt', 'class_weight': None...",284.582041,357.8284,1.218173,0 min 8.60 sec
0,CatBoostRegressor,"{'loss_function': 'RMSE', 'verbose': False, 'r...",264.860066,360.3538,0.5975176,2 min 3.34 sec
7,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",135.521288,363.822,0.5022381,3 min 38.52 sec
1,ExtraTreesRegressor,"{'bootstrap': False, 'ccp_alpha': 0.0, 'criter...",0.088081,375.6141,1.90699,3 min 23.77 sec
8,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",189.621524,382.564,2.250403,0 min 45.15 sec
3,KNN,"{'memory': None, 'steps': [('scaler', Standard...",480.787673,491.8325,1.305807,0 min 0.38 sec
6,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",355.915066,360432500.0,430274000.0,0 min 1.35 sec
4,LR Pipeline,"{'memory': None, 'steps': [('scaler', Standard...",356.058696,5028766000000.0,1150295000000.0,0 min 0.93 sec


# FEATURE SELECTION

### Mutual Information

In [99]:
# To ensure the same randomness everytime
np.random.seed(5)

X_mi = train_ofe_clean.copy()

# Add random feature
X_mi['random_feature_continous'] = np.round(np.random.uniform(-2, 2, X_train.shape[0]), 6)
X_mi['random_feature_categorical'] = np.random.randint(1, 8, X_train.shape[0])
X_mi.head()

Unnamed: 0,clonesize,honeybee,bumbles,andrena,osmia,MaxOfUpperTRange,MinOfUpperTRange,AverageOfUpperTRange,MaxOfLowerTRange,MinOfLowerTRange,AverageOfLowerTRange,RainingDays,AverageRainingDays,fruitset,fruitmass,seeds,autoFE_f_0,autoFE_f_1,autoFE_f_2,autoFE_f_3,autoFE_f_4,autoFE_f_5,autoFE_f_6,autoFE_f_7,autoFE_f_8,autoFE_f_9,autoFE_f_10,autoFE_f_11,autoFE_f_12,autoFE_f_13,autoFE_f_14,autoFE_f_15,autoFE_f_16,autoFE_f_17,autoFE_f_18,autoFE_f_19,autoFE_f_20,autoFE_f_21,autoFE_f_22,autoFE_f_23,autoFE_f_24,autoFE_f_25,autoFE_f_26,autoFE_f_27,autoFE_f_28,autoFE_f_29,autoFE_f_30,autoFE_f_31,autoFE_f_32,autoFE_f_33,autoFE_f_34,autoFE_f_35,autoFE_f_36,autoFE_f_37,autoFE_f_38,autoFE_f_39,autoFE_f_40,autoFE_f_41,autoFE_f_42,autoFE_f_43,autoFE_f_44,autoFE_f_45,autoFE_f_46,autoFE_f_47,autoFE_f_48,autoFE_f_49,autoFE_f_50,autoFE_f_51,autoFE_f_52,autoFE_f_53,autoFE_f_54,autoFE_f_55,autoFE_f_56,autoFE_f_57,autoFE_f_58,autoFE_f_59,autoFE_f_60,autoFE_f_61,autoFE_f_62,autoFE_f_63,autoFE_f_64,autoFE_f_65,autoFE_f_66,autoFE_f_67,autoFE_f_68,autoFE_f_69,autoFE_f_70,autoFE_f_71,autoFE_f_72,autoFE_f_73,autoFE_f_74,autoFE_f_75,autoFE_f_76,autoFE_f_77,autoFE_f_78,autoFE_f_79,autoFE_f_80,autoFE_f_81,autoFE_f_82,autoFE_f_83,autoFE_f_84,autoFE_f_85,autoFE_f_86,autoFE_f_87,autoFE_f_88,autoFE_f_89,autoFE_f_90,autoFE_f_91,autoFE_f_92,autoFE_f_94,autoFE_f_95,autoFE_f_96,autoFE_f_97,autoFE_f_98,autoFE_f_99,autoFE_f_100,autoFE_f_102,autoFE_f_103,autoFE_f_104,autoFE_f_105,autoFE_f_106,autoFE_f_107,autoFE_f_108,autoFE_f_109,autoFE_f_110,autoFE_f_111,autoFE_f_112,autoFE_f_113,autoFE_f_114,autoFE_f_115,autoFE_f_116,autoFE_f_118,autoFE_f_119,autoFE_f_121,autoFE_f_122,autoFE_f_123,autoFE_f_124,autoFE_f_125,autoFE_f_127,autoFE_f_128,autoFE_f_129,autoFE_f_130,autoFE_f_131,autoFE_f_132,autoFE_f_133,autoFE_f_134,autoFE_f_135,autoFE_f_136,autoFE_f_137,autoFE_f_138,autoFE_f_139,autoFE_f_140,autoFE_f_141,autoFE_f_142,autoFE_f_143,autoFE_f_144,autoFE_f_145,autoFE_f_147,autoFE_f_148,autoFE_f_149,autoFE_f_150,autoFE_f_151,autoFE_f_153,autoFE_f_154,autoFE_f_155,autoFE_f_156,autoFE_f_157,autoFE_f_158,autoFE_f_159,autoFE_f_160,autoFE_f_161,autoFE_f_162,autoFE_f_163,autoFE_f_164,autoFE_f_166,autoFE_f_167,autoFE_f_168,autoFE_f_169,autoFE_f_170,autoFE_f_171,autoFE_f_172,autoFE_f_173,autoFE_f_174,autoFE_f_175,autoFE_f_176,autoFE_f_177,autoFE_f_178,autoFE_f_179,autoFE_f_180,autoFE_f_181,autoFE_f_182,autoFE_f_183,autoFE_f_184,autoFE_f_185,autoFE_f_186,autoFE_f_187,autoFE_f_188,autoFE_f_189,autoFE_f_190,autoFE_f_191,autoFE_f_192,autoFE_f_193,autoFE_f_194,autoFE_f_195,autoFE_f_196,autoFE_f_197,autoFE_f_198,autoFE_f_199,autoFE_f_200,autoFE_f_201,autoFE_f_202,autoFE_f_203,autoFE_f_204,autoFE_f_205,autoFE_f_206,autoFE_f_207,autoFE_f_208,autoFE_f_209,autoFE_f_210,autoFE_f_211,autoFE_f_212,autoFE_f_213,autoFE_f_214,autoFE_f_215,autoFE_f_216,autoFE_f_217,autoFE_f_218,autoFE_f_219,autoFE_f_220,autoFE_f_221,autoFE_f_224,autoFE_f_225,autoFE_f_226,autoFE_f_227,autoFE_f_228,autoFE_f_229,autoFE_f_230,autoFE_f_231,autoFE_f_232,autoFE_f_233,autoFE_f_234,autoFE_f_235,autoFE_f_236,autoFE_f_237,autoFE_f_238,autoFE_f_239,autoFE_f_240,autoFE_f_241,autoFE_f_242,autoFE_f_243,autoFE_f_244,autoFE_f_245,autoFE_f_246,autoFE_f_247,autoFE_f_248,autoFE_f_249,autoFE_f_250,autoFE_f_251,autoFE_f_252,autoFE_f_253,autoFE_f_254,autoFE_f_255,autoFE_f_256,autoFE_f_257,autoFE_f_258,autoFE_f_259,autoFE_f_260,autoFE_f_261,autoFE_f_262,autoFE_f_263,autoFE_f_264,autoFE_f_265,autoFE_f_266,autoFE_f_267,autoFE_f_268,autoFE_f_269,autoFE_f_270,autoFE_f_271,autoFE_f_272,autoFE_f_273,autoFE_f_274,autoFE_f_275,autoFE_f_276,autoFE_f_277,autoFE_f_278,autoFE_f_279,autoFE_f_280,autoFE_f_281,autoFE_f_282,autoFE_f_283,autoFE_f_284,autoFE_f_285,autoFE_f_286,autoFE_f_287,autoFE_f_288,autoFE_f_289,autoFE_f_290,autoFE_f_291,autoFE_f_292,autoFE_f_293,autoFE_f_294,autoFE_f_295,autoFE_f_296,autoFE_f_297,autoFE_f_298,autoFE_f_299,autoFE_f_300,autoFE_f_301,autoFE_f_302,autoFE_f_303,random_feature_continous,random_feature_categorical
0,25.0,0.5,0.25,0.75,0.5,69.7,42.1,58.2,50.2,24.3,41.2,24.0,0.39,0.425011,0.417545,32.460887,13.796231,0.425011,32.885898,0.266774,32.0,32.878433,32.460887,0.165424,0.16548,0.165452,0.177461,0.007465,0.266729,0.266729,0.012863,0.177078,0.194769,0.460887,31.0,13.553895,0.40742,0.223651,1.017879,0.013093,38.0,-32.043342,0.227123,0.266677,37.0,-32.035876,0.425011,4651.0,0.296909,4.0,0.26652,0.130591,0.259215,0.206987,3891.0,0.209715,0.166014,0.175718,0.259302,0.417545,0.165424,0.0,0.281257,0.24329,0.175718,0.842556,0.407489,0.0,0.0,0.0,0.646178,0.0,0.0,0.0,0.645641,0.407389,0.0,9101.0,794.0,0.0,0.233554,3189.0,1536.0,0.266819,0.0,0.0,0.0,3693.0,0.0,740.0,0.063043,3015.0,5.0,1675.0,0.483347,1.0,4.0,3049.0,0.645641,0.0,0.0,0.0,1659.0,46.369344,0.0,2.0,0.226966,0.40742,740.0,0.0,5.0,0.448256,0.165424,1536.0,4265.0,1536.0,0.652144,0.0,5.0,0.0,3016.0,0.407589,13056.0,0.0,0.320727,0.437646,0.320727,0.497503,5.0,0.0,5.0,23.412776,0.0,5901.0,6371.0,0.0,0.652144,4.0,46.585105,0.0,2.0,5.0,7.0,4.0,34.849953,5.0,0.53566,5901.0,7.0,1536.0,2.0,2.0,0.0,0.0,0.0,0.53566,4.0,4.0,6.0,6.0,23.412776,0.0,0.226568,6.0,5.0,0.0,794.0,5.0,0.0,5902.0,0.0,9.0,34.695867,0.029353,0.436183,0.320727,3016.0,5.0,0.425154,46.369344,4.0,5903.0,0.0,5902.0,23.412776,0.488876,6.0,0.320727,7.0,740.0,3016.0,0.320727,1536.0,1.0,0.645641,13814.0,4267.0,0.645641,6370.0,740.0,1536.0,5902.0,4.0,2.0,0.53566,0.0,24.320627,46.369344,46.585105,5.0,5.0,0.032394,4.0,4.0,8.0,0.0,2.0,0.320727,0.509934,3188.0,0.0,35.739824,0.533821,36.360591,3189.0,36.311593,4.0,4.0,4.0,0.0,2.0,4266.0,0.0,4651.0,1536.0,1536.0,4266.0,4.0,4.0,35.104165,4265.0,1536.0,0.226568,5900.0,0.320727,0.0,0.0,4.0,3.0,5901.0,3189.0,1536.0,0.53566,24.320627,0.0,4.0,24.320627,5.0,5903.0,5902.0,0.424657,0.0,3015.0,3692.0,0.0,18122.0,0.0,0.0,0.0,0.068009,6.0,2.0,0.226568,6.0,4.0,0.0,0.0,0.443,0.192732,24.320627,3189.0,0.226568,8428.0,0.0,6.0,3.469912,0.0,6.0,4.0,34.849953,6.0,0.645641,2.0,0.645641,0.437473,0.448383,0.266729,0.0,0.0,2.0,3892.0,0.507141,0.437266,0.0,4.0,6371.0,0.0,1.0,740.0,5900.0,-1.112027,2
1,25.0,0.5,0.25,0.5,0.5,69.7,42.1,58.2,50.2,24.3,41.2,24.0,0.39,0.444908,0.422051,33.858317,15.063846,0.444908,34.303225,0.420366,33.0,34.280369,33.858317,0.246231,0.246229,0.246272,0.187774,0.022857,0.420295,0.420295,0.012465,0.232894,0.257246,0.858317,37.0,14.28995,0.464001,0.335118,1.054157,0.01314,57.0,-33.436266,0.369094,0.314943,30.0,-33.413409,0.444908,4651.0,0.337375,4.0,0.314943,0.196436,0.292324,0.254706,3891.0,0.274794,0.287254,0.244703,0.387361,0.422051,0.246231,0.0,0.321939,0.369542,0.244703,0.86696,0.46408,0.0,1.0,0.0,0.649655,0.0,1.0,1.0,0.645641,0.463989,1.0,9101.0,1713.0,0.0,0.192732,3189.0,1536.0,0.420268,0.0,0.0,1.0,3693.0,0.0,1694.0,0.063043,3015.0,5.0,4318.0,0.483347,1.0,4.0,3904.0,0.645641,0.0,0.0,1.0,4255.0,46.369344,0.0,2.0,0.369094,0.464001,1694.0,1.0,5.0,0.446576,0.246231,1536.0,4265.0,1536.0,0.652144,0.0,5.0,1.0,3016.0,0.464171,13056.0,0.0,0.320727,0.437646,0.320727,0.497503,5.0,0.0,5.0,23.412776,0.0,5901.0,6371.0,1.0,0.652144,4.0,46.585105,0.0,2.0,5.0,7.0,4.0,34.849953,5.0,0.53566,5901.0,7.0,1536.0,2.0,2.0,0.0,0.0,0.0,0.53566,4.0,4.0,6.0,6.0,24.320627,0.0,0.226568,6.0,5.0,0.0,1713.0,5.0,0.0,5902.0,0.0,9.0,34.695867,0.029353,0.436183,0.320727,3016.0,5.0,0.425154,46.369344,4.0,5903.0,0.0,5902.0,23.412776,0.488876,6.0,0.320727,7.0,1694.0,3016.0,0.320727,1536.0,1.0,0.645641,13814.0,4267.0,0.645641,6370.0,1693.0,1536.0,5902.0,4.0,2.0,0.53566,0.0,24.320627,46.369344,46.585105,5.0,5.0,0.032394,4.0,4.0,8.0,0.0,2.0,0.320727,0.507215,3188.0,0.0,35.739824,0.533821,36.392487,3189.0,36.166044,4.0,4.0,4.0,0.0,2.0,4266.0,0.0,4651.0,1536.0,1536.0,4266.0,4.0,4.0,35.104165,4265.0,1536.0,0.226568,5900.0,0.320727,0.0,0.0,4.0,4.0,5901.0,3189.0,1536.0,0.53566,24.320627,0.0,4.0,24.320627,5.0,5903.0,5902.0,0.424657,0.0,3015.0,3692.0,0.0,18122.0,0.0,0.0,0.0,0.068009,6.0,2.0,0.226568,6.0,4.0,0.0,0.0,0.443,0.192732,24.320627,3189.0,0.226568,8428.0,0.0,6.0,3.469912,0.0,6.0,4.0,34.849953,6.0,0.645641,2.0,0.645641,0.437473,0.448406,0.420295,0.0,1.0,4.0,3892.0,0.506261,0.437266,0.0,4.0,6371.0,0.0,1.0,1694.0,5900.0,1.482929,5
2,12.5,0.25,0.25,0.63,0.63,86.0,52.0,71.9,62.0,30.0,50.8,24.0,0.39,0.552927,0.470853,38.341781,21.200199,0.552927,38.894708,0.661442,38.0,38.812634,38.341781,0.692763,0.692763,0.692719,0.260347,0.082074,0.661491,0.661586,0.01228,0.757201,0.579199,0.341781,31.0,18.053338,0.667004,0.738053,1.174309,0.014421,24.0,-37.870928,0.830168,0.842175,29.0,-37.788854,0.552927,4651.0,0.549465,7.0,0.842175,0.745086,0.753614,0.733855,2356.0,0.544433,0.716005,0.838565,0.51559,0.470853,0.692863,1.0,0.583698,0.552538,0.838565,1.02378,0.666859,1.0,2.0,1.0,0.686187,1.0,2.0,2.0,0.652144,0.666907,2.0,8581.0,1266.0,1.0,0.233554,2908.0,1717.0,0.661491,0.0,1.0,2.0,2616.0,1.0,1434.0,0.076395,3207.0,5.0,1713.0,0.524854,1.0,4.0,4375.0,0.652144,1.0,1.0,2.0,1736.0,46.369344,1.0,2.0,0.830168,0.666907,1435.0,2.0,6.0,0.44657,0.692763,1717.0,4939.0,1716.0,0.645641,1.0,5.0,2.0,3208.0,0.667004,11101.0,1.0,0.335339,0.437646,0.311921,0.497503,5.0,1.0,5.0,23.412776,1.0,6921.0,6371.0,2.0,0.652144,4.0,46.585105,1.0,2.0,5.0,5.0,6.0,36.661684,5.0,0.53566,6923.0,5.0,1716.0,2.0,1.0,1.0,1.0,1.0,0.53566,10.0,4.0,5.0,5.0,23.412776,1.0,0.233554,6.0,5.0,1.0,1266.0,5.0,1.0,6922.0,1.0,9.0,36.508523,0.036841,0.46148,0.320727,3207.0,5.0,0.452137,46.585105,4.0,6923.0,1.0,6922.0,23.412776,0.5369,5.0,0.320727,14.0,1435.0,3208.0,0.320727,1717.0,1.0,0.652144,11129.0,4938.0,0.652144,6370.0,1434.0,1717.0,6922.0,7.0,2.0,0.53566,1.0,22.079199,46.585105,46.585105,4.0,5.0,0.038487,6.0,4.0,8.0,1.0,2.0,0.311921,0.504761,2908.0,1.0,35.739824,0.53566,36.085144,2907.0,35.923314,7.0,4.0,7.0,1.0,2.0,4939.0,1.0,4651.0,1717.0,1717.0,4939.0,6.0,6.0,37.388526,4938.0,1717.0,0.233554,6922.0,0.320727,0.0,1.0,10.0,5.0,6923.0,2907.0,1717.0,0.53566,22.079199,1.0,4.0,22.079199,6.0,6922.0,6922.0,0.45312,1.0,3208.0,2616.0,1.0,18122.0,1.0,1.0,1.0,0.067236,6.0,1.0,0.233554,5.0,6.0,0.0,1.0,0.443,0.233554,22.079199,2908.0,0.233554,9506.0,1.0,6.0,4.188904,1.0,6.0,10.0,36.661684,6.0,0.652144,1.0,0.645641,0.457101,0.446562,0.661491,1.0,2.0,6.0,2356.0,0.502599,0.437266,1.0,7.0,6371.0,1.0,1.0,1435.0,6922.0,-1.173123,4
3,12.5,0.25,0.25,0.63,0.5,77.4,46.8,64.7,55.8,27.0,45.8,24.0,0.39,0.565976,0.478137,39.467561,22.337712,0.565976,40.033538,0.67565,39.0,39.945698,39.467561,0.667764,0.667764,0.667816,0.270614,0.08784,0.675599,0.675599,0.012115,0.797732,0.634626,0.467561,28.0,18.870884,0.669906,0.802367,1.183713,0.01434,27.0,-38.989425,0.883456,0.887459,1.0,-38.901585,0.565976,4651.0,0.619732,7.0,0.887459,0.78911,0.805761,0.796029,2356.0,0.603963,0.791416,0.876628,0.609129,0.478137,0.667764,2.0,0.649113,0.639455,0.876628,1.044113,0.669853,2.0,3.0,1.0,0.691474,2.0,2.0,2.0,0.652144,0.669801,2.0,8581.0,1266.0,1.0,0.233554,2880.0,1535.0,0.675599,0.0,2.0,2.0,2616.0,1.0,1238.0,0.070664,3095.0,5.0,1713.0,0.524854,2.0,4.0,4375.0,0.652144,2.0,2.0,3.0,1736.0,46.369344,1.0,1.0,0.883456,0.669749,1238.0,3.0,6.0,0.44657,0.667712,1535.0,4408.0,1534.0,0.645641,1.0,8.0,3.0,3095.0,0.669801,11101.0,2.0,0.335339,0.437646,0.311921,0.497503,5.0,2.0,5.0,23.412776,2.0,6381.0,6371.0,2.0,0.652144,4.0,46.585105,2.0,2.0,5.0,5.0,5.0,37.712513,8.0,0.53566,6379.0,5.0,1535.0,1.0,2.0,2.0,2.0,2.0,0.53566,5.0,4.0,5.0,5.0,23.412776,2.0,0.233554,6.0,5.0,2.0,1266.0,5.0,2.0,6380.0,2.0,9.0,37.402177,0.033306,0.46148,0.320727,3095.0,5.0,0.460807,46.585105,4.0,6379.0,2.0,6381.0,23.412776,0.5369,5.0,0.320727,10.0,1239.0,3095.0,0.320727,1534.0,2.0,0.652144,11129.0,4408.0,0.652144,6370.0,1238.0,1534.0,6380.0,7.0,2.0,0.53566,2.0,23.412776,46.585105,46.585105,4.0,5.0,0.038487,5.0,4.0,8.0,2.0,1.0,0.311921,0.504761,2879.0,2.0,35.739824,0.53566,36.085144,2880.0,35.923314,7.0,4.0,7.0,2.0,2.0,4409.0,2.0,4651.0,1535.0,1535.0,4407.0,5.0,5.0,37.388526,4409.0,1534.0,0.233554,6381.0,0.320727,0.0,2.0,5.0,5.0,6380.0,2880.0,1534.0,0.53566,23.412776,2.0,4.0,23.412776,6.0,6381.0,6379.0,0.463759,2.0,3094.0,2616.0,2.0,18122.0,2.0,2.0,2.0,0.067236,6.0,2.0,0.233554,5.0,5.0,0.0,1.0,0.443,0.233554,23.412776,2880.0,0.233554,9506.0,1.0,6.0,4.188904,2.0,6.0,5.0,37.712513,6.0,0.652144,2.0,0.645641,0.457101,0.446562,0.675549,2.0,3.0,6.0,2356.0,0.502599,0.437266,2.0,7.0,6371.0,2.0,1.0,1239.0,6380.0,1.674444,6
4,25.0,0.5,0.25,0.63,0.63,77.4,46.8,64.7,55.8,27.0,45.8,24.0,0.39,0.579677,0.494165,40.484512,23.467926,0.579677,41.064188,0.786431,40.0,40.978677,40.484512,0.763047,0.763047,0.763084,0.286456,0.085512,0.786397,0.786397,0.012206,0.854983,0.93565,0.484512,1.0,20.006019,0.851176,0.868392,1.173043,0.014318,38.0,-39.990347,0.931879,0.950714,20.0,-39.904835,0.579677,4651.0,0.960873,7.0,0.950714,0.849679,0.908619,0.913357,3891.0,0.937781,0.87505,0.918537,0.941147,0.494165,0.763047,3.0,0.959509,0.939053,0.918537,1.073841,0.851065,2.0,3.0,0.0,0.702969,3.0,2.0,2.0,0.652144,0.851042,3.0,9101.0,1266.0,0.0,0.233554,3378.0,1535.0,0.786397,0.0,2.0,3.0,3693.0,0.0,1238.0,0.070664,3232.0,5.0,3268.0,0.483347,2.0,4.0,4375.0,0.652144,2.0,2.0,3.0,3245.0,46.369344,0.0,1.0,0.931879,0.851019,1238.0,3.0,5.0,0.44657,0.763009,1535.0,4408.0,1534.0,0.645641,0.0,8.0,3.0,3231.0,0.851042,13056.0,3.0,0.335339,0.437646,0.311921,0.497503,5.0,2.0,5.0,23.412776,2.0,6381.0,6371.0,2.0,0.652144,4.0,46.585105,2.0,2.0,5.0,7.0,5.0,37.712513,8.0,0.53566,6379.0,7.0,1535.0,1.0,2.0,2.0,2.0,2.0,0.53566,5.0,4.0,5.0,5.0,23.412776,3.0,0.233554,6.0,5.0,2.0,1266.0,5.0,3.0,6380.0,2.0,9.0,37.402177,0.033306,0.436183,0.320727,3233.0,5.0,0.460807,46.585105,4.0,6379.0,2.0,6381.0,23.412776,0.488876,5.0,0.320727,10.0,1239.0,3233.0,0.320727,1534.0,2.0,0.652144,13814.0,4408.0,0.652144,6370.0,1238.0,1534.0,6380.0,7.0,2.0,0.53566,2.0,23.412776,46.585105,46.585105,5.0,5.0,0.032394,5.0,4.0,8.0,3.0,1.0,0.311921,0.504761,3378.0,2.0,35.739824,0.533821,36.085144,3377.0,35.923314,7.0,4.0,7.0,2.0,2.0,4409.0,3.0,4651.0,1535.0,1535.0,4407.0,5.0,5.0,35.104165,4409.0,1534.0,0.233554,6381.0,0.320727,0.0,2.0,5.0,5.0,6380.0,3378.0,1534.0,0.53566,23.412776,3.0,4.0,23.412776,5.0,6381.0,6379.0,0.463759,3.0,3233.0,3692.0,2.0,18122.0,2.0,2.0,2.0,0.068009,6.0,2.0,0.233554,6.0,5.0,0.0,0.0,0.443,0.192732,23.412776,3376.0,0.233554,8428.0,0.0,6.0,3.469912,3.0,6.0,5.0,37.712513,6.0,0.652144,2.0,0.645641,0.437473,0.446562,0.786364,2.0,3.0,6.0,3892.0,0.502599,0.437266,2.0,7.0,6371.0,2.0,1.0,1239.0,6380.0,-0.046355,2


In [100]:
# Combine X and y for mutual information

X_and_y = pd.concat([X_mi, y_train], axis=1)

In [101]:
# Initialize parameters
random_states = [5, 42, 100, 500]
n_neighbors_list = [3, 5, 7, 10, 20]
results = defaultdict(list)

In [102]:
# Calculate MI for each combination of random_state and n_neighbors
for random_state in random_states:
    for n_neighbors in n_neighbors_list:        
        # Calculate MI
        mi = mutual_info_regression(X_and_y, y_train, n_neighbors=n_neighbors, random_state=random_state)
        
        # Store results if the target has the highest MI score
        mi_dict = dict(zip(X_and_y.columns, mi))
        if mi_dict[TARGET] == max(mi_dict.values()):
            for feature, score in mi_dict.items():
                results[feature].append(score)

        print(f'Done with Random State - {random_state} and N Neighbors - {n_neighbors}')

# 16 minutes

Done with Random State - 5 and N Neighbors - 3
Done with Random State - 5 and N Neighbors - 5
Done with Random State - 5 and N Neighbors - 7
Done with Random State - 5 and N Neighbors - 10
Done with Random State - 5 and N Neighbors - 20
Done with Random State - 42 and N Neighbors - 3
Done with Random State - 42 and N Neighbors - 5
Done with Random State - 42 and N Neighbors - 7
Done with Random State - 42 and N Neighbors - 10
Done with Random State - 42 and N Neighbors - 20
Done with Random State - 100 and N Neighbors - 3
Done with Random State - 100 and N Neighbors - 5
Done with Random State - 100 and N Neighbors - 7
Done with Random State - 100 and N Neighbors - 10
Done with Random State - 100 and N Neighbors - 20
Done with Random State - 500 and N Neighbors - 3
Done with Random State - 500 and N Neighbors - 5
Done with Random State - 500 and N Neighbors - 7
Done with Random State - 500 and N Neighbors - 10
Done with Random State - 500 and N Neighbors - 20


In [103]:
# Average MI scores across valid combinations
average_mi = {feature: np.mean(scores) for feature, scores in results.items() if scores}
average_mi

{'clonesize': 0.09688619996819144,
 'honeybee': 0.06836308197807399,
 'bumbles': 0.02262557407668959,
 'andrena': 0.026954120386921753,
 'osmia': 0.03508373077921008,
 'MaxOfUpperTRange': 0.01804195616734954,
 'MinOfUpperTRange': 0.016968723538207574,
 'AverageOfUpperTRange': 0.017749899770761867,
 'MaxOfLowerTRange': 0.01799892497265825,
 'MinOfLowerTRange': 0.015239468362257601,
 'AverageOfLowerTRange': 0.01726033077238196,
 'RainingDays': 0.16272430034888377,
 'AverageRainingDays': 0.16330437873593276,
 'fruitset': 1.250255131825028,
 'fruitmass': 0.9069587932775496,
 'seeds': 1.0754376689271432,
 'autoFE_f_0': 1.1619198863622875,
 'autoFE_f_1': 1.2503169122808784,
 'autoFE_f_2': 1.0882098213032207,
 'autoFE_f_3': 0.900203945723483,
 'autoFE_f_4': 1.0302142696544496,
 'autoFE_f_5': 1.080281991501603,
 'autoFE_f_6': 1.0754587191842613,
 'autoFE_f_7': 0.9595427641320979,
 'autoFE_f_8': 0.9592901093945503,
 'autoFE_f_9': 0.9595640190377228,
 'autoFE_f_10': 1.1017991184942004,
 'autoFE_

In [104]:
# Display results
sorted_mi = sorted(average_mi.items(), key=lambda x: x[1], reverse=True)
print("Average MI scores:")
for item in sorted_mi:
    print(item)

Average MI scores:
('yield', 6.590818904098439)
('autoFE_f_1', 1.2503169122808784)
('fruitset', 1.250255131825028)
('autoFE_f_30', 1.1927776667202483)
('autoFE_f_0', 1.1619198863622875)
('autoFE_f_49', 1.1061045145802701)
('autoFE_f_10', 1.1017991184942004)
('autoFE_f_2', 1.0882098213032207)
('autoFE_f_5', 1.080281991501603)
('autoFE_f_29', 1.0786695749067392)
('autoFE_f_25', 1.07582081746041)
('autoFE_f_6', 1.0754587191842613)
('seeds', 1.0754376689271432)
('autoFE_f_4', 1.0302142696544496)
('autoFE_f_15', 1.0285572346958207)
('autoFE_f_43', 0.9958241259952103)
('autoFE_f_35', 0.9792922804176782)
('autoFE_f_19', 0.9742922133803926)
('autoFE_f_9', 0.9595640190377228)
('autoFE_f_7', 0.9595427641320979)
('autoFE_f_44', 0.9594685241199589)
('autoFE_f_8', 0.9592901093945503)
('autoFE_f_97', 0.9590318049745765)
('autoFE_f_21', 0.9206289920978028)
('fruitmass', 0.9069587932775496)
('autoFE_f_54', 0.9066776805792711)
('autoFE_f_3', 0.900203945723483)
('autoFE_f_12', 0.8998850956256014)
('auto

In [105]:
# Determine higher MI between 0 and random_feature
higher_threshold = max(0, average_mi.get('random_feature_categorical', 0), average_mi.get('random_feature_continous', 0))
higher_threshold

0.0024292609350075045

In [106]:
# List features with MI higher than the threshold, excluding the target
mi_features_list = [feature for feature, score in sorted_mi if feature != TARGET and score > higher_threshold]
mi_features_list

['autoFE_f_1',
 'fruitset',
 'autoFE_f_30',
 'autoFE_f_0',
 'autoFE_f_49',
 'autoFE_f_10',
 'autoFE_f_2',
 'autoFE_f_5',
 'autoFE_f_29',
 'autoFE_f_25',
 'autoFE_f_6',
 'seeds',
 'autoFE_f_4',
 'autoFE_f_15',
 'autoFE_f_43',
 'autoFE_f_35',
 'autoFE_f_19',
 'autoFE_f_9',
 'autoFE_f_7',
 'autoFE_f_44',
 'autoFE_f_8',
 'autoFE_f_97',
 'autoFE_f_21',
 'fruitmass',
 'autoFE_f_54',
 'autoFE_f_3',
 'autoFE_f_12',
 'autoFE_f_290',
 'autoFE_f_13',
 'autoFE_f_67',
 'autoFE_f_16',
 'autoFE_f_11',
 'autoFE_f_40',
 'autoFE_f_22',
 'autoFE_f_20',
 'autoFE_f_91',
 'autoFE_f_59',
 'autoFE_f_107',
 'autoFE_f_50',
 'autoFE_f_39',
 'autoFE_f_47',
 'autoFE_f_36',
 'autoFE_f_37',
 'autoFE_f_42',
 'autoFE_f_46',
 'autoFE_f_32',
 'autoFE_f_41',
 'autoFE_f_48',
 'autoFE_f_14',
 'autoFE_f_23',
 'autoFE_f_26',
 'autoFE_f_90',
 'autoFE_f_27',
 'autoFE_f_34',
 'autoFE_f_24',
 'autoFE_f_17',
 'autoFE_f_38',
 'autoFE_f_294',
 'autoFE_f_277',
 'autoFE_f_53',
 'autoFE_f_18',
 'autoFE_f_257',
 'autoFE_f_71',
 'autoFE

In [107]:
mi_features = {}

for model in models:
    if hasattr(model, 'name'):
        model_name = model.name
    else:
        model_name = model.__class__.__name__

    mi_features[model_name] = mi_features_list

In [108]:
with open('mi_features_ofe.txt', mode='w') as f:
    pprint(mi_features, stream=f)

In [109]:
%%time

openfe_models = evaluate_models(models, train_ofe_clean, y_train, mi_features, k10, f'{experiment_name}')
openfe_models

Models:   0%|          | 0/9 [00:00<?, ?it/s]

Progress:   0%|          | 0/9 [00:00<?, ?it/s]

Done with HistGradientBoostingRegressor.
Done with LGBMRegressor.
Done with LinearRegression.
Done with LR Pipeline.
Done with KNN.
Done with CatBoostRegressor.
Done with XGBRegressor.
Done with ExtraTreesRegressor.
Done with RandomForestRegressor.
CPU times: total: 6.44 s
Wall time: 4min 42s


Unnamed: 0,Model Name,Model Parameters,Model Train Accuracy,Model Test Accuracy,Model Test Accuracy Std,Model Time
2,HistGradientBoostingRegressor,"{'categorical_features': None, 'early_stopping...",312.520238,355.8268,1.391191,0 min 9.11 sec
5,LGBMRegressor,"{'boosting_type': 'gbdt', 'class_weight': None...",284.871166,357.3111,1.465843,0 min 6.94 sec
0,CatBoostRegressor,"{'loss_function': 'RMSE', 'verbose': False, 'r...",266.005446,358.7521,1.414994,1 min 40.13 sec
7,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",135.53238,364.2212,0.6090605,3 min 22.67 sec
1,ExtraTreesRegressor,"{'bootstrap': False, 'ccp_alpha': 0.0, 'criter...",0.088081,375.6752,1.751338,3 min 5.06 sec
8,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",189.621524,381.2772,2.003249,0 min 41.27 sec
3,KNN,"{'memory': None, 'steps': [('scaler', Standard...",480.239929,491.0762,1.31575,0 min 0.39 sec
6,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",355.916883,4370.177,2302.077,0 min 0.72 sec
4,LR Pipeline,"{'memory': None, 'steps': [('scaler', Standard...",356.233067,2273845000000.0,1450825000000.0,0 min 0.77 sec


### Permutation Importance

In [133]:
%%time

perm_cv = KFold(n_splits=3, shuffle=True, random_state=5)

perm_importances = {model.name if hasattr(model, 'name') else model.__class__.__name__: [] for model in models}

for i, (train_idx, test_idx) in enumerate(perm_cv.split(X_mi, y_train)):
    X_train_pi, X_test_pi = X_mi.iloc[train_idx], X_mi.iloc[test_idx]
    y_train_pi, y_test_pi = y_train.iloc[train_idx], y_train.iloc[test_idx]

    for model in models:
        if hasattr(model, 'name'):
            model_name = model.name
        else:
            model_name = model.__class__.__name__

        model.fit(X_train_pi, y_train_pi)

        # Calculate permutation importance
        result = permutation_importance(model, X_test_pi, y_test_pi, n_repeats=5, random_state=5, scoring='neg_mean_absolute_error')

        perm_importances[model_name].append(result.importances_mean)
        
        print(f'Done with {model_name}.')
    
    print(f'Done with Fold {i+1}', end='\n\n')

Done with CatBoostRegressor.
Done with ExtraTreesRegressor.
Done with HistGradientBoostingRegressor.
Done with KNN.
Done with LR Pipeline.
Done with LGBMRegressor.
Done with LinearRegression.
Done with RandomForestRegressor.
Done with XGBRegressor.
Done with Fold 1

Done with CatBoostRegressor.
Done with ExtraTreesRegressor.
Done with HistGradientBoostingRegressor.
Done with KNN.
Done with LR Pipeline.
Done with LGBMRegressor.
Done with LinearRegression.
Done with RandomForestRegressor.
Done with XGBRegressor.
Done with Fold 2

Done with CatBoostRegressor.
Done with ExtraTreesRegressor.
Done with HistGradientBoostingRegressor.
Done with KNN.
Done with LR Pipeline.
Done with LGBMRegressor.
Done with LinearRegression.
Done with RandomForestRegressor.
Done with XGBRegressor.
Done with Fold 3

CPU times: total: 3h 51min 3s
Wall time: 3h 19min 21s


In [134]:
%%time

# Average importances across folds and export to CSV
for model_name, importances in perm_importances.items():
    avg_importance = np.mean(importances, axis=0)

    importance_df = pd.DataFrame({'Feature': X_mi.columns, 'Importance': avg_importance})

    importance_df.sort_values(by='Importance', ascending=False, inplace=True)
    
    # Export to CSV
    importance_df.to_csv(f'.\permutation_importances\{model_name}_permutation_importance.csv', index=False)

print('Done with Permuation Importances', end='\n\n')

Done with Permuation Importances

CPU times: total: 15.6 ms
Wall time: 51.7 ms


In [135]:
directory = 'permutation_importances'

# Initialize a dictionary for the features
perm_important_features = {}

for model in models:
    if hasattr(model, 'name'):
        model_name = model.name
    else:
        model_name = model.__class__.__name__
    print(f'Model: {model_name}')

    csv_path = os.path.join(directory, f'{model_name}_permutation_importance.csv')
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)

        # Initialize importance variables
        random_feature_importance_cont = 0
        random_feature_importance_cat = 0
        
        # Check for 'random_feature_continous' and its importance
        if 'random_feature_continous' in df['Feature'].values:
            random_feature_importance_cont = df.loc[df['Feature'] == 'random_feature_continous', 'Importance'].iloc[0]
            print(random_feature_importance_cont)
        if 'random_feature_categorical' in df['Feature'].values:
            random_feature_importance_cat = df.loc[df['Feature'] == 'random_feature_categorical', 'Importance'].iloc[0]
            print(random_feature_importance_cat)
        else:
            random_feature_importance = 0

        # Determine the threshold
        threshold = max(0, random_feature_importance_cont, random_feature_importance_cat)
        print(f'Threshold: {threshold}')

        # Filter features where importance is greater than 0
        important_feats_filtered = df[df['Importance'] > threshold]['Feature'].tolist()

        # # Reorder important_feats based on the predefined features_list
        # important_feats_ordered = [feat for feat in features_list if feat in important_feats_filtered]

        # Add to importance dictionary
        perm_important_features[model_name] = important_feats_filtered

    else:
        print(f'CSV file for {model_name} not found.')

print('Done getting important features dictionary')

Model: CatBoostRegressor
0.6305742846787629
-0.1101050176316259
Threshold: 0.6305742846787629
Model: ExtraTreesRegressor
-0.1687513604265859
0.2108935119996772
Threshold: 0.2108935119996772
Model: HistGradientBoostingRegressor
-0.0386719684141401
-0.0067297981883181
Threshold: 0
Model: KNN
0.1100206248867418
0.26540902566461
Threshold: 0.26540902566461
Model: LR Pipeline
-0.0399902343749999
-0.0050455729166666
Threshold: 0
Model: LGBMRegressor
-0.2628713371120587
0.2500800615004095
Threshold: 0.2500800615004095
Model: LinearRegression
-0.03503130103151
-0.0046430629988511
Threshold: 0
Model: RandomForestRegressor
-0.2115225162797931
0.0184743937541914
Threshold: 0.0184743937541914
Model: XGBRegressor
-0.8572510388749529
0.1742902170630752
Threshold: 0.1742902170630752
Done getting important features dictionary


In [136]:
with open('perm_important_features_ofe.txt', mode='w') as f:
    pprint(perm_important_features, stream=f)

In [137]:
%%time

perm_imp_models = evaluate_models(models, train_ofe_clean, y_train, perm_important_features, k10, f'{experiment_name}')
perm_imp_models

Models:   0%|          | 0/9 [00:00<?, ?it/s]

Progress:   0%|          | 0/9 [00:00<?, ?it/s]

Done with LGBMRegressor.
Done with KNN.
Done with LinearRegression.
Done with HistGradientBoostingRegressor.
Done with ExtraTreesRegressor.
Done with XGBRegressor.
Done with LR Pipeline.
Done with CatBoostRegressor.
Done with RandomForestRegressor.
CPU times: total: 2.91 s
Wall time: 2min 1s


Unnamed: 0,Model Name,Model Parameters,Model Train Accuracy,Model Test Accuracy,Model Test Accuracy Std,Model Time
2,HistGradientBoostingRegressor,"{'categorical_features': None, 'early_stopping...",308.360755,355.8238,1.007208,0 min 4.08 sec
5,LGBMRegressor,"{'boosting_type': 'gbdt', 'class_weight': None...",289.619349,356.7973,1.512244,0 min 1.95 sec
0,CatBoostRegressor,"{'loss_function': 'RMSE', 'verbose': False, 'r...",271.048756,358.4085,1.674231,1 min 0.92 sec
3,KNN,"{'memory': None, 'steps': [('scaler', Standard...",355.597089,362.3269,3.230285,0 min 0.08 sec
7,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",135.340409,364.1293,0.6057182,1 min 20.91 sec
1,ExtraTreesRegressor,"{'bootstrap': False, 'ccp_alpha': 0.0, 'criter...",0.088081,371.5439,1.258526,0 min 35.83 sec
8,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",190.061984,381.1891,0.8297489,0 min 23.65 sec
6,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",355.911991,228899900.0,232023200.0,0 min 0.81 sec
4,LR Pipeline,"{'memory': None, 'steps': [('scaler', Standard...",356.490587,12033620000000.0,8213508000000.0,0 min 0.80 sec


### SFS

In [None]:
%%time

# Initialize empty dictionary for SFS features
sfs_features = {}

for model in models:
    if hasattr(model, 'name'):
        model_name = model.name
    else:
        model_name = model.__class__.__name__

    try:
        # features = feats_so_far[model_name]
        features = baseline_features[model_name]

        # incase there is no feature that had importance, go to the next model
        if len(features) == 0:
            continue
        
        X_sfs = X_train[features]

        print(f'Running backward feature selection with {model_name}')

        sfs = SFS(model,
            k_features='best',
            forward=False,
            floating=False,
            scoring='neg_mean_absolute_error',
            verbose=2,
            n_jobs=-1,
            cv=k10)
        
        sfs = sfs.fit(X_sfs, y_train)

        # Get the selected features index
        selected_sfs_idx = list(sfs.k_feature_idx_)

        # Get the feature names
        selected_sfs_feats = X_sfs.columns[selected_sfs_idx]

        selected_features = list(selected_sfs_feats)

        # # Reorder selected_features based on the predefined features_list
        # selected_features_ordered = [feat for feat in features_list if feat in selected_features]

        sfs_features[model_name] = selected_features

        print(f'Done with {model_name}', end='\n\n')

    except KeyError:
        print(f'{model_name} not in the dictionary.')

# Took 138 minutes

In [None]:
%%time

sfs_models = evaluate_models(models, X_train, y_train, sfs_features, k10, f'{experiment_name}_sfs')
sfs_models

In [None]:
with open('sfs_features.txt', mode='w') as f:
    pprint(sfs_features, stream=f)

In [None]:
sfs_features = {'CatBoostRegressor': ['honeybee',
                       'MaxOfUpperTRange',
                       'MinOfUpperTRange',
                       'AverageOfUpperTRange',
                       'MinOfLowerTRange',
                       'AverageOfLowerTRange',
                       'AverageRainingDays',
                       'fruitset',
                       'fruitmass',
                       'seeds'],
 'ExtraTreesRegressor': ['clonesize',
                         'andrena',
                         'osmia',
                         'AverageOfLowerTRange',
                         'RainingDays',
                         'fruitset',
                         'fruitmass',
                         'seeds'],
 'HistGradientBoostingRegressor': ['clonesize',
                                   'honeybee',
                                   'MinOfUpperTRange',
                                   'RainingDays',
                                   'fruitset',
                                   'fruitmass',
                                   'seeds'],
 'KNN': ['MaxOfLowerTRange', 'fruitset', 'seeds'],
 'LGBMRegressor': ['clonesize',
                   'MaxOfLowerTRange',
                   'RainingDays',
                   'AverageRainingDays',
                   'fruitset',
                   'fruitmass',
                   'seeds'],
 'LR Pipeline': ['bumbles',
                 'andrena',
                 'osmia',
                 'AverageOfUpperTRange',
                 'MinOfLowerTRange',
                 'AverageRainingDays',
                 'fruitset',
                 'fruitmass',
                 'seeds'],
 'LinearRegression': ['bumbles',
                      'andrena',
                      'osmia',
                      'AverageOfUpperTRange',
                      'MinOfLowerTRange',
                      'AverageRainingDays',
                      'fruitset',
                      'fruitmass',
                      'seeds'],
 'RandomForestRegressor': ['clonesize',
                           'bumbles',
                           'andrena',
                           'osmia',
                           'MaxOfUpperTRange',
                           'MinOfUpperTRange',
                           'AverageOfUpperTRange',
                           'MinOfLowerTRange',
                           'AverageOfLowerTRange',
                           'RainingDays',
                           'AverageRainingDays',
                           'fruitset',
                           'fruitmass',
                           'seeds'],
 'XGBRegressor': ['AverageOfLowerTRange',
                  'AverageRainingDays',
                  'fruitset',
                  'seeds']}


## Best Single Model

In [110]:
model = HistGradientBoostingRegressor(random_state=5)

model.fit(train_ofe_clean[mi_features_list], y_train)

In [111]:
pred = model.predict(test_ofe_clean[mi_features_list])

pred_df = pd.DataFrame(pred, columns=['yield'])
pred_df.head()

Unnamed: 0,yield
0,4251.973443
1,6036.019978
2,7239.748855
3,4729.879767
4,4211.327659


In [121]:
test_pred = pd.concat([test_ofe_clean[mi_features_list], pred_df], axis=1)
test_pred.head()

Unnamed: 0,autoFE_f_1,fruitset,autoFE_f_30,autoFE_f_0,autoFE_f_49,autoFE_f_10,autoFE_f_2,autoFE_f_5,autoFE_f_29,autoFE_f_25,autoFE_f_6,seeds,autoFE_f_4,autoFE_f_15,autoFE_f_43,autoFE_f_35,autoFE_f_19,autoFE_f_9,autoFE_f_7,autoFE_f_44,autoFE_f_8,autoFE_f_97,autoFE_f_21,fruitmass,autoFE_f_54,autoFE_f_3,autoFE_f_12,autoFE_f_290,autoFE_f_13,autoFE_f_67,autoFE_f_16,autoFE_f_11,autoFE_f_40,autoFE_f_22,autoFE_f_20,autoFE_f_91,autoFE_f_59,autoFE_f_107,autoFE_f_50,autoFE_f_39,autoFE_f_47,autoFE_f_36,autoFE_f_37,autoFE_f_42,autoFE_f_46,autoFE_f_32,autoFE_f_41,autoFE_f_48,autoFE_f_14,autoFE_f_23,autoFE_f_26,autoFE_f_90,autoFE_f_27,autoFE_f_34,autoFE_f_24,autoFE_f_17,autoFE_f_38,autoFE_f_294,autoFE_f_277,autoFE_f_53,autoFE_f_18,autoFE_f_257,autoFE_f_71,autoFE_f_270,autoFE_f_63,autoFE_f_28,autoFE_f_62,autoFE_f_157,autoFE_f_57,autoFE_f_123,autoFE_f_31,autoFE_f_98,autoFE_f_68,autoFE_f_269,autoFE_f_184,autoFE_f_227,autoFE_f_192,autoFE_f_66,autoFE_f_235,autoFE_f_119,autoFE_f_229,autoFE_f_245,autoFE_f_84,autoFE_f_143,autoFE_f_260,autoFE_f_51,autoFE_f_291,autoFE_f_262,autoFE_f_212,autoFE_f_142,autoFE_f_83,autoFE_f_228,autoFE_f_100,autoFE_f_138,autoFE_f_111,autoFE_f_122,autoFE_f_205,autoFE_f_299,autoFE_f_239,autoFE_f_190,autoFE_f_296,AverageRainingDays,RainingDays,autoFE_f_125,autoFE_f_219,autoFE_f_147,autoFE_f_61,autoFE_f_88,autoFE_f_238,autoFE_f_183,autoFE_f_287,autoFE_f_77,autoFE_f_60,autoFE_f_129,autoFE_f_211,autoFE_f_244,autoFE_f_216,autoFE_f_65,autoFE_f_274,autoFE_f_226,autoFE_f_255,autoFE_f_248,autoFE_f_55,autoFE_f_109,autoFE_f_108,autoFE_f_86,autoFE_f_70,autoFE_f_103,autoFE_f_72,autoFE_f_276,autoFE_f_166,autoFE_f_169,autoFE_f_263,autoFE_f_214,autoFE_f_224,autoFE_f_187,clonesize,autoFE_f_95,autoFE_f_196,autoFE_f_80,autoFE_f_106,autoFE_f_256,autoFE_f_168,autoFE_f_182,autoFE_f_75,autoFE_f_249,autoFE_f_45,autoFE_f_207,autoFE_f_280,autoFE_f_159,autoFE_f_151,autoFE_f_272,autoFE_f_137,autoFE_f_78,autoFE_f_279,autoFE_f_251,autoFE_f_177,honeybee,autoFE_f_131,autoFE_f_288,autoFE_f_267,autoFE_f_203,autoFE_f_233,autoFE_f_201,autoFE_f_114,autoFE_f_56,autoFE_f_81,autoFE_f_188,autoFE_f_99,autoFE_f_230,autoFE_f_225,autoFE_f_94,autoFE_f_234,autoFE_f_105,autoFE_f_85,autoFE_f_292,autoFE_f_52,autoFE_f_174,autoFE_f_221,autoFE_f_197,autoFE_f_69,autoFE_f_161,autoFE_f_281,osmia,autoFE_f_92,autoFE_f_73,autoFE_f_162,autoFE_f_181,autoFE_f_191,autoFE_f_302,autoFE_f_210,autoFE_f_289,autoFE_f_295,andrena,autoFE_f_217,autoFE_f_96,autoFE_f_215,autoFE_f_259,autoFE_f_213,bumbles,autoFE_f_113,autoFE_f_64,autoFE_f_206,autoFE_f_271,autoFE_f_172,autoFE_f_264,autoFE_f_175,autoFE_f_303,autoFE_f_278,autoFE_f_127,autoFE_f_252,autoFE_f_133,autoFE_f_283,autoFE_f_116,autoFE_f_110,autoFE_f_156,autoFE_f_282,MaxOfUpperTRange,autoFE_f_253,MaxOfLowerTRange,autoFE_f_160,AverageOfUpperTRange,autoFE_f_163,autoFE_f_240,autoFE_f_237,autoFE_f_136,autoFE_f_193,autoFE_f_121,autoFE_f_173,autoFE_f_202,autoFE_f_115,autoFE_f_128,autoFE_f_254,autoFE_f_297,autoFE_f_180,autoFE_f_241,autoFE_f_261,AverageOfLowerTRange,autoFE_f_141,autoFE_f_243,autoFE_f_154,autoFE_f_300,autoFE_f_258,autoFE_f_74,autoFE_f_158,MinOfUpperTRange,autoFE_f_164,autoFE_f_145,autoFE_f_167,autoFE_f_194,autoFE_f_231,autoFE_f_170,MinOfLowerTRange,autoFE_f_275,autoFE_f_218,autoFE_f_298,autoFE_f_266,autoFE_f_33,autoFE_f_220,autoFE_f_153,autoFE_f_268,autoFE_f_236,autoFE_f_171,autoFE_f_232,autoFE_f_139,autoFE_f_79,autoFE_f_132,autoFE_f_209,autoFE_f_89,autoFE_f_208,autoFE_f_204,autoFE_f_178,autoFE_f_199,autoFE_f_242,autoFE_f_250,autoFE_f_247,autoFE_f_149,autoFE_f_134,autoFE_f_273,autoFE_f_148,autoFE_f_102,autoFE_f_112,autoFE_f_185,autoFE_f_198,autoFE_f_285,autoFE_f_104,autoFE_f_58,autoFE_f_186,autoFE_f_293,autoFE_f_150,autoFE_f_195,autoFE_f_82,autoFE_f_87,autoFE_f_200,autoFE_f_155,autoFE_f_124,autoFE_f_189,autoFE_f_246,autoFE_f_284,autoFE_f_179,autoFE_f_140,yield
0,0.399367,0.399367,0.408088,12.537955,0.807455,0.162977,31.793936,31.802657,-30.995202,-30.98648,31.394569,31.394569,31.0,0.123441,0.399367,0.251378,12.811749,0.109506,0.10949,0.109506,0.10949,0.10949,0.150673,0.408088,0.638818,0.125253,0.125235,0.125235,0.125253,0.125235,0.07858,-0.008721,0.285924,0.978629,0.139194,0.139174,0.139174,0.139194,0.139194,0.141415,0.097648,0.198902,0.33609,0.171348,0.130087,0.225822,0.113326,0.113169,0.012999,0.012721,0.141265,0.141108,0.196672,0.196515,65.0,0.394569,3891.0,3892.0,0.0,0.0,29.0,2616.0,2616.0,1.0,1.0,63.0,684.0,684.0,18.0,18.0,4651.0,1717.0,0.0,0.0,1717.0,4651.0,1717.0,1717.0,1717.0,1.0,1717.0,1717.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1717.0,1716.0,1716.0,0.437646,6371.0,4.0,6371.0,0.0,6370.0,0.437266,0.39,24.0,4.0,4.0,4.0,9101.0,0.0,0.320727,0.320727,0.645641,557.0,9.0,2.0,3836.0,3836.0,3836.0,3836.0,3836.0,5.0,5.0,5.0,5.0,5.0,719.0,2520.0,7.0,2.0,1.0,9506.0,0.436183,5.0,0.068009,0.533821,2.0,13814.0,25.0,5.0,0.53566,4.0,3208.0,3208.0,3207.0,3208.0,3207.0,4.0,1.0,1.0,1.0,1.0,1.0,0.192732,7.0,0.524854,4.188904,6.0,0.5369,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,5.0,5.0,2712.0,4938.0,4939.0,4939.0,4939.0,9.0,4938.0,9.0,9.0,9.0,9.0,1.0,1.0,1.0,1.0,1.0,6.0,0.25,738.0,738.0,9.0,738.0,738.0,737.0,0.473459,0.431184,0.468053,0.25,34.137585,0.429122,34.43203,18122.0,35.739824,0.25,0.497503,0.226568,8.0,0.443,4.0,6.0,6922.0,6922.0,6.0,46.585105,6922.0,36.661684,36.661684,5.0,0.311921,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.45312,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,6.0,1.0,1.0,0.076395,4.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,4.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.645641,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,2.0,23.412776,2.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,4251.973443
1,0.488048,0.488048,0.488048,17.98308,0.930914,0.21614,37.335004,37.289822,-36.358909,-36.404091,36.846956,36.846956,36.0,0.434444,0.442866,0.365981,16.31825,0.447175,0.446978,0.446907,0.447066,0.447121,0.614971,0.442866,0.665481,0.593281,0.593287,0.593476,0.593192,0.593351,0.280066,0.045182,0.562684,1.102023,0.463562,0.46371,0.463648,0.463648,0.463932,0.231153,0.431366,0.497296,0.443193,0.386647,0.351094,0.306586,0.138446,0.138469,0.012019,0.013245,0.27664,0.276687,0.210522,0.210557,42.0,0.846956,3293.0,3293.0,3.0,3.0,8.0,3418.0,3418.0,3.0,3.0,38.0,769.0,769.0,4.0,4.0,3955.0,1374.0,3.0,3.0,1373.0,3954.0,1374.0,1374.0,1374.0,11.0,1374.0,1374.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,1373.0,1372.0,1372.0,0.468665,5959.0,5.0,5958.0,2.0,5958.0,0.471161,0.1,1.0,6.0,5.0,6.0,8581.0,1.0,0.349354,0.349354,0.652144,1384.0,5.0,1.0,2722.0,2719.0,2721.0,2721.0,2719.0,7.0,7.0,7.0,7.0,7.0,11101.0,1400.0,5.0,1.0,1.0,9506.0,0.46148,5.0,0.067236,0.53566,2.0,11129.0,12.5,6.0,0.53566,4.0,2969.0,2972.0,2969.0,2971.0,2971.0,4.0,7.0,7.0,7.0,7.0,7.0,0.233554,5.0,0.524854,4.188904,6.0,0.5369,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,6.0,0.0,3049.0,4505.0,4506.0,4504.0,4505.0,11.0,4502.0,11.0,11.0,11.0,11.0,3.0,3.0,3.0,3.0,3.0,6.0,0.63,669.0,670.0,9.0,670.0,670.0,668.0,0.509934,0.448383,0.507141,0.75,36.311593,0.448256,36.360591,18122.0,35.739824,0.25,0.497503,0.233554,9.0,0.443,4.0,6.0,6266.0,6270.0,6.0,46.585105,6272.0,35.756223,35.756223,5.0,0.320727,3.0,7.0,94.6,6269.0,68.2,6271.0,79.0,35.864247,3.0,6265.0,6268.0,6264.0,6266.0,6269.0,5.0,3.0,3.0,0.446451,3.0,12.0,7.0,3.0,55.9,3.0,6270.0,6.0,3.0,3.0,0.080261,5.0,57.2,0.038244,7.0,0.320727,5.0,6.0,0.445705,33.0,0.192732,5.0,5.0,0.192732,5.0,5.0,0.192732,6.0,0.192732,46.369344,6.0,4.0,4.0,6.0,0.320727,4.0,3.0,6.0,6.0,46.369344,3.0,23.412776,23.412776,6.0,5.0,23.412776,6.0,0.652144,0.320727,1.0,23.412776,0.652144,5.0,0.652144,0.652144,2.0,23.412776,1.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,2.0,6036.019978
2,0.583379,0.583379,0.583379,23.35711,1.070436,0.284139,40.621023,40.524701,-39.454265,-39.550587,40.037644,40.037644,40.0,0.872282,0.487057,0.870337,19.500607,0.828301,0.828326,0.828301,0.828326,0.828326,0.835669,0.487057,0.697895,0.783516,0.783548,0.783548,0.783661,0.783548,0.750268,0.096322,0.832631,1.197763,0.81631,0.816337,0.816337,0.816455,0.81631,0.729311,0.690755,0.870572,0.876153,0.664705,0.750681,0.72994,0.828281,0.828281,0.012165,0.014571,0.77183,0.77183,0.826961,0.826961,172.0,0.037644,3418.0,3418.0,6.0,6.0,36.0,3639.0,3639.0,6.0,6.0,47.0,1433.0,1433.0,13.0,13.0,5015.0,1934.0,6.0,6.0,1934.0,5015.0,1934.0,1934.0,1934.0,15.0,1934.0,1934.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,1934.0,1934.0,1934.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,8581.0,1.0,0.320727,0.320727,0.652144,1713.0,2.0,1.0,2908.0,2907.0,2907.0,2908.0,2908.0,1.0,1.0,1.0,1.0,1.0,11101.0,1736.0,2.0,1.0,1.0,9506.0,0.46148,3.0,0.067236,0.53566,1.0,11129.0,12.5,6.0,0.53566,4.0,3208.0,3208.0,3207.0,3208.0,3207.0,4.0,1.0,1.0,1.0,1.0,1.0,0.233554,5.0,0.524854,4.188904,6.0,0.5369,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,4.0,2.0,4375.0,4938.0,4939.0,4939.0,4939.0,2.0,4938.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0,0.63,1435.0,1434.0,8.0,1435.0,1434.0,1435.0,0.504761,0.446562,0.502599,0.63,35.923314,0.44657,36.085144,18122.0,35.739824,0.25,0.497503,0.233554,8.0,0.443,4.0,6.0,6922.0,6922.0,5.0,46.585105,6922.0,36.661684,36.661684,5.0,0.335339,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.45312,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,5.0,1.0,1.0,0.076395,5.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,5.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.645641,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,6.0,23.412776,2.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,7239.748855
3,0.433014,0.433014,0.433014,14.339741,0.855861,0.183099,33.549105,33.538938,-32.683077,-32.693244,33.116091,33.116091,33.0,0.132604,0.422847,0.157341,14.003027,0.177261,0.177235,0.177261,0.177235,0.177235,0.171759,0.422847,0.650267,0.211355,0.211325,0.211325,0.211355,0.211325,0.222146,0.010168,0.213296,1.024046,0.208827,0.208797,0.208797,0.208827,0.208827,0.238418,0.292995,0.199284,0.239312,0.308672,0.323086,0.33857,0.083009,0.083009,0.012769,0.013076,0.122497,0.122497,0.151001,0.151001,31.0,0.116091,3674.0,3674.0,4.0,4.0,29.0,3514.0,3514.0,4.0,4.0,19.0,2243.0,2243.0,5.0,5.0,2172.0,1934.0,4.0,4.0,1934.0,2172.0,1934.0,1934.0,1934.0,15.0,1934.0,1934.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,1934.0,1934.0,1934.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,4698.0,3.0,0.320727,0.320727,0.652144,3977.0,6.0,1.0,3836.0,3836.0,3836.0,3836.0,3836.0,5.0,5.0,5.0,5.0,5.0,13056.0,3870.0,6.0,0.0,3.0,4641.0,0.436183,3.0,0.068009,0.533821,1.0,13814.0,25.0,5.0,0.53566,4.0,3603.0,3603.0,3603.0,3603.0,3603.0,4.0,5.0,5.0,5.0,5.0,5.0,0.192732,7.0,0.483347,3.469912,5.0,0.488876,0.5,7.0,0.437473,6.0,0.032394,35.104165,5.0,4.0,4.0,3577.0,1930.0,1930.0,1930.0,1930.0,10.0,1930.0,10.0,10.0,10.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,0.63,2023.0,2023.0,8.0,2023.0,2023.0,2023.0,0.512803,0.449702,0.509669,0.38,36.682588,0.450692,36.554771,7266.0,37.22826,0.38,0.529071,0.233554,8.0,0.455488,5.0,4.0,6922.0,6922.0,5.0,46.585105,6922.0,36.661684,36.661684,5.0,0.320727,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.45312,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,5.0,1.0,1.0,0.076395,5.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,3.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.652144,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,2.0,23.412776,2.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,4729.879767
4,0.360996,0.360996,0.38886,10.670317,0.749856,0.140377,29.919015,29.946879,-29.197023,-29.169159,29.558019,29.558019,29.0,0.045028,0.360996,0.114296,11.49394,0.05466,0.054616,0.054608,0.054608,0.054634,0.052312,0.38886,0.623587,0.05849,0.058444,0.058462,0.058434,0.058434,0.271795,-0.027865,0.123117,0.928343,0.066656,0.066677,0.066645,0.066645,0.066709,0.326622,0.351282,0.070384,0.155274,0.368009,0.382051,0.436242,0.031863,0.031706,0.013156,0.012213,0.035159,0.035002,0.053759,0.053602,46.0,0.558019,110.0,110.0,9.0,9.0,36.0,47.0,47.0,10.0,10.0,40.0,684.0,684.0,18.0,18.0,4651.0,1583.0,0.0,0.0,1583.0,4651.0,1584.0,1583.0,1584.0,13.0,1583.0,1584.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,1582.0,1582.0,1581.0,0.437646,6371.0,4.0,6371.0,0.0,6370.0,0.437266,0.39,24.0,4.0,4.0,4.0,433.0,4.0,0.320727,0.320727,0.645641,415.0,10.0,2.0,124.0,124.0,124.0,124.0,124.0,10.0,10.0,10.0,10.0,10.0,146.0,163.0,9.0,3.0,4.0,183.0,0.39703,5.0,0.07383,0.51479,2.0,447.0,37.5,4.0,0.53566,4.0,61.0,61.0,61.0,61.0,61.0,4.0,10.0,10.0,10.0,10.0,10.0,0.233554,5.0,0.403677,3.763778,4.0,0.386657,0.75,4.0,0.402506,2.0,0.03539,31.339562,3.0,5.0,5.0,2712.0,4505.0,4506.0,4504.0,4505.0,16.0,4502.0,16.0,16.0,16.0,16.0,3.0,3.0,3.0,3.0,3.0,6.0,0.25,730.0,730.0,9.0,730.0,730.0,730.0,0.473459,0.431184,0.468053,0.25,34.137585,0.429122,34.43203,18122.0,35.739824,0.25,0.497503,0.226568,8.0,0.443,4.0,6.0,6266.0,6270.0,6.0,43.974882,6272.0,35.756223,35.756223,4.0,0.311921,3.0,7.0,94.6,6269.0,68.2,6271.0,79.0,35.864247,3.0,6265.0,6268.0,6264.0,6266.0,6269.0,3.0,3.0,3.0,0.446451,3.0,12.0,7.0,3.0,55.9,3.0,6270.0,6.0,3.0,3.0,0.080261,4.0,57.2,0.038244,7.0,0.311921,5.0,6.0,0.445705,33.0,0.192732,5.0,5.0,0.192732,5.0,5.0,0.192732,6.0,0.192732,46.369344,6.0,4.0,4.0,6.0,0.320727,4.0,3.0,6.0,6.0,46.369344,4.0,23.412776,23.412776,6.0,5.0,23.412776,6.0,0.645641,0.320727,1.0,23.412776,0.652144,5.0,0.652144,0.652144,2.0,23.412776,1.0,0.652144,46.585105,43.974882,6.0,0.621811,0.652144,0.53566,4.0,0.311921,2.0,4211.327659


### Manual Correction

In [122]:
# Combine X and y for mutual information

X_and_y = pd.concat([train_ofe_clean[mi_features_list], y_train], axis=1)

In [123]:
# Get list of all feature columns (excluding TARGET)
feature_columns = X_and_y.columns[X_and_y.columns != TARGET]

# Initialize a dictionary to store corrections
corrections = {}

# Iterate over each feature column
for column in feature_columns:
    # Count occurrences of each value in the column
    value_counts = X_and_y[column].value_counts()

    # Filter values that appear at least twice
    values_to_check = value_counts[value_counts >= 2].index
     
    # Iterate over each value in the column that appears at least twice
    for value in values_to_check:
        # Get rows where column equals value
        rows_with_value = X_and_y[X_and_y[column] == value]

        # Check if all rows with this value in column have the same target
        if rows_with_value[TARGET].nunique() == 1:
            target_value = rows_with_value[TARGET].iloc[0]
        
            # Store correction if it matches training pattern
            if (column, value, target_value) not in corrections:
                corrections[(column, value)] = target_value

In [124]:
corrections

{('autoFE_f_1', 0.249334678): 2605.69676,
 ('autoFE_f_1', 0.552101998): 7198.42285,
 ('fruitset', 0.249334678): 2605.69676,
 ('fruitset', 0.552101998): 7198.42285,
 ('autoFE_f_30', 0.370487534): 3139.43255,
 ('autoFE_f_30', 0.552101998): 7198.42285,
 ('autoFE_f_30', 0.342841074): 3436.49354,
 ('autoFE_f_0', 26.010030712147667): 7825.87492,
 ('autoFE_f_0', 10.835292393834624): 3708.20502,
 ('autoFE_f_0', 5.837616892739071): 2605.69676,
 ('autoFE_f_0', 8.645255682729612): 3049.26032,
 ('autoFE_f_0', 26.97231373398639): 8086.38881,
 ('autoFE_f_0', 12.294650507036259): 4179.18592,
 ('autoFE_f_0', 9.453110791942418): 3501.17992,
 ('autoFE_f_0', 8.885980656230375): 3238.02815,
 ('autoFE_f_0', 15.259729951702418): 5046.45734,
 ('autoFE_f_0', 14.17534999427809): 4673.38052,
 ('autoFE_f_0', 14.860035087676671): 4947.60566,
 ('autoFE_f_0', 18.590380091715055): 6301.03442,
 ('autoFE_f_0', 14.339741262671172): 4503.16186,
 ('autoFE_f_0', 7.632562839677861): 2688.02883,
 ('autoFE_f_0', 19.824076153

In [125]:
# Initialize a dictionary to store feature names related to each target value
target_features_dict = {}

# Iterate over corrections to populate target_features_dict
for (column, value), target_value in corrections.items():
    if target_value not in target_features_dict:
        target_features_dict[target_value] = []
    target_features_dict[target_value].append(column)

# Print or return the dictionary where target values are keys and related feature names are values
print("Dictionary of target values and related feature names:")
print(target_features_dict)

Dictionary of target values and related feature names:
{2605.69676: ['autoFE_f_1', 'fruitset', 'autoFE_f_0', 'autoFE_f_49', 'autoFE_f_49', 'autoFE_f_10', 'autoFE_f_10', 'autoFE_f_2', 'autoFE_f_5', 'autoFE_f_5', 'autoFE_f_29', 'autoFE_f_25', 'autoFE_f_25', 'autoFE_f_15', 'autoFE_f_43', 'autoFE_f_35', 'autoFE_f_19', 'autoFE_f_19', 'autoFE_f_9', 'autoFE_f_9', 'autoFE_f_7', 'autoFE_f_7', 'autoFE_f_44', 'autoFE_f_44', 'autoFE_f_8', 'autoFE_f_8', 'autoFE_f_97', 'autoFE_f_97', 'autoFE_f_16', 'autoFE_f_16', 'autoFE_f_11', 'autoFE_f_11', 'autoFE_f_40', 'autoFE_f_22', 'autoFE_f_22', 'autoFE_f_39', 'autoFE_f_39', 'autoFE_f_39', 'autoFE_f_42', 'autoFE_f_42', 'autoFE_f_46', 'autoFE_f_32', 'autoFE_f_41', 'autoFE_f_48', 'autoFE_f_14', 'autoFE_f_14', 'autoFE_f_23'], 7198.42285: ['autoFE_f_1', 'fruitset', 'autoFE_f_30'], 3139.43255: ['autoFE_f_30', 'autoFE_f_49', 'autoFE_f_10', 'autoFE_f_5', 'autoFE_f_25', 'autoFE_f_19', 'autoFE_f_11', 'autoFE_f_22', 'autoFE_f_20', 'autoFE_f_91', 'autoFE_f_59', 'autoFE

In [127]:
# Apply corrections to test_pred
for (column, value), target_value in corrections.items():
    test_pred.loc[test_pred[column] == value, TARGET] = target_value

# Print corrected predictions
print("Corrected predictions in test_pred:")
test_pred

Corrected predictions in test_pred:


Unnamed: 0,autoFE_f_1,fruitset,autoFE_f_30,autoFE_f_0,autoFE_f_49,autoFE_f_10,autoFE_f_2,autoFE_f_5,autoFE_f_29,autoFE_f_25,autoFE_f_6,seeds,autoFE_f_4,autoFE_f_15,autoFE_f_43,autoFE_f_35,autoFE_f_19,autoFE_f_9,autoFE_f_7,autoFE_f_44,autoFE_f_8,autoFE_f_97,autoFE_f_21,fruitmass,autoFE_f_54,autoFE_f_3,autoFE_f_12,autoFE_f_290,autoFE_f_13,autoFE_f_67,autoFE_f_16,autoFE_f_11,autoFE_f_40,autoFE_f_22,autoFE_f_20,autoFE_f_91,autoFE_f_59,autoFE_f_107,autoFE_f_50,autoFE_f_39,autoFE_f_47,autoFE_f_36,autoFE_f_37,autoFE_f_42,autoFE_f_46,autoFE_f_32,autoFE_f_41,autoFE_f_48,autoFE_f_14,autoFE_f_23,autoFE_f_26,autoFE_f_90,autoFE_f_27,autoFE_f_34,autoFE_f_24,autoFE_f_17,autoFE_f_38,autoFE_f_294,autoFE_f_277,autoFE_f_53,autoFE_f_18,autoFE_f_257,autoFE_f_71,autoFE_f_270,autoFE_f_63,autoFE_f_28,autoFE_f_62,autoFE_f_157,autoFE_f_57,autoFE_f_123,autoFE_f_31,autoFE_f_98,autoFE_f_68,autoFE_f_269,autoFE_f_184,autoFE_f_227,autoFE_f_192,autoFE_f_66,autoFE_f_235,autoFE_f_119,autoFE_f_229,autoFE_f_245,autoFE_f_84,autoFE_f_143,autoFE_f_260,autoFE_f_51,autoFE_f_291,autoFE_f_262,autoFE_f_212,autoFE_f_142,autoFE_f_83,autoFE_f_228,autoFE_f_100,autoFE_f_138,autoFE_f_111,autoFE_f_122,autoFE_f_205,autoFE_f_299,autoFE_f_239,autoFE_f_190,autoFE_f_296,AverageRainingDays,RainingDays,autoFE_f_125,autoFE_f_219,autoFE_f_147,autoFE_f_61,autoFE_f_88,autoFE_f_238,autoFE_f_183,autoFE_f_287,autoFE_f_77,autoFE_f_60,autoFE_f_129,autoFE_f_211,autoFE_f_244,autoFE_f_216,autoFE_f_65,autoFE_f_274,autoFE_f_226,autoFE_f_255,autoFE_f_248,autoFE_f_55,autoFE_f_109,autoFE_f_108,autoFE_f_86,autoFE_f_70,autoFE_f_103,autoFE_f_72,autoFE_f_276,autoFE_f_166,autoFE_f_169,autoFE_f_263,autoFE_f_214,autoFE_f_224,autoFE_f_187,clonesize,autoFE_f_95,autoFE_f_196,autoFE_f_80,autoFE_f_106,autoFE_f_256,autoFE_f_168,autoFE_f_182,autoFE_f_75,autoFE_f_249,autoFE_f_45,autoFE_f_207,autoFE_f_280,autoFE_f_159,autoFE_f_151,autoFE_f_272,autoFE_f_137,autoFE_f_78,autoFE_f_279,autoFE_f_251,autoFE_f_177,honeybee,autoFE_f_131,autoFE_f_288,autoFE_f_267,autoFE_f_203,autoFE_f_233,autoFE_f_201,autoFE_f_114,autoFE_f_56,autoFE_f_81,autoFE_f_188,autoFE_f_99,autoFE_f_230,autoFE_f_225,autoFE_f_94,autoFE_f_234,autoFE_f_105,autoFE_f_85,autoFE_f_292,autoFE_f_52,autoFE_f_174,autoFE_f_221,autoFE_f_197,autoFE_f_69,autoFE_f_161,autoFE_f_281,osmia,autoFE_f_92,autoFE_f_73,autoFE_f_162,autoFE_f_181,autoFE_f_191,autoFE_f_302,autoFE_f_210,autoFE_f_289,autoFE_f_295,andrena,autoFE_f_217,autoFE_f_96,autoFE_f_215,autoFE_f_259,autoFE_f_213,bumbles,autoFE_f_113,autoFE_f_64,autoFE_f_206,autoFE_f_271,autoFE_f_172,autoFE_f_264,autoFE_f_175,autoFE_f_303,autoFE_f_278,autoFE_f_127,autoFE_f_252,autoFE_f_133,autoFE_f_283,autoFE_f_116,autoFE_f_110,autoFE_f_156,autoFE_f_282,MaxOfUpperTRange,autoFE_f_253,MaxOfLowerTRange,autoFE_f_160,AverageOfUpperTRange,autoFE_f_163,autoFE_f_240,autoFE_f_237,autoFE_f_136,autoFE_f_193,autoFE_f_121,autoFE_f_173,autoFE_f_202,autoFE_f_115,autoFE_f_128,autoFE_f_254,autoFE_f_297,autoFE_f_180,autoFE_f_241,autoFE_f_261,AverageOfLowerTRange,autoFE_f_141,autoFE_f_243,autoFE_f_154,autoFE_f_300,autoFE_f_258,autoFE_f_74,autoFE_f_158,MinOfUpperTRange,autoFE_f_164,autoFE_f_145,autoFE_f_167,autoFE_f_194,autoFE_f_231,autoFE_f_170,MinOfLowerTRange,autoFE_f_275,autoFE_f_218,autoFE_f_298,autoFE_f_266,autoFE_f_33,autoFE_f_220,autoFE_f_153,autoFE_f_268,autoFE_f_236,autoFE_f_171,autoFE_f_232,autoFE_f_139,autoFE_f_79,autoFE_f_132,autoFE_f_209,autoFE_f_89,autoFE_f_208,autoFE_f_204,autoFE_f_178,autoFE_f_199,autoFE_f_242,autoFE_f_250,autoFE_f_247,autoFE_f_149,autoFE_f_134,autoFE_f_273,autoFE_f_148,autoFE_f_102,autoFE_f_112,autoFE_f_185,autoFE_f_198,autoFE_f_285,autoFE_f_104,autoFE_f_58,autoFE_f_186,autoFE_f_293,autoFE_f_150,autoFE_f_195,autoFE_f_82,autoFE_f_87,autoFE_f_200,autoFE_f_155,autoFE_f_124,autoFE_f_189,autoFE_f_246,autoFE_f_284,autoFE_f_179,autoFE_f_140,yield
0,0.399367,0.399367,0.408088,12.537955,0.807455,0.162977,31.793936,31.802657,-30.995202,-30.986480,31.394569,31.394569,31.0,0.123441,0.399367,0.251378,12.811749,0.109506,0.109490,0.109506,0.109490,0.109490,0.150673,0.408088,0.638818,0.125253,0.125235,0.125235,0.125253,0.125235,0.078580,-0.008721,0.285924,0.978629,0.139194,0.139174,0.139174,0.139194,0.139194,0.141415,0.097648,0.198902,0.336090,0.171348,0.130087,0.225822,0.113326,0.113169,0.012999,0.012721,0.141265,0.141108,0.196672,0.196515,65.0,0.394569,3891.0,3892.0,0.0,0.0,29.0,2616.0,2616.0,1.0,1.0,63.0,684.0,684.0,18.0,18.0,4651.0,1717.0,0.0,0.0,1717.0,4651.0,1717.0,1717.0,1717.0,1.0,1717.0,1717.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1717.0,1716.0,1716.0,0.437646,6371.0,4.0,6371.0,0.0,6370.0,0.437266,0.39,24.0,4.0,4.0,4.0,9101.0,0.0,0.320727,0.320727,0.645641,557.0,9.0,2.0,3836.0,3836.0,3836.0,3836.0,3836.0,5.0,5.0,5.0,5.0,5.0,719.0,2520.0,7.0,2.0,1.0,9506.0,0.436183,5.0,0.068009,0.533821,2.0,13814.0,25.0,5.0,0.535660,4.0,3208.0,3208.0,3207.0,3208.0,3207.0,4.0,1.0,1.0,1.0,1.0,1.0,0.192732,7.0,0.524854,4.188904,6.0,0.536900,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,5.0,5.0,2712.0,4938.0,4939.0,4939.0,4939.0,9.0,4938.0,9.0,9.0,9.0,9.0,1.0,1.0,1.0,1.0,1.0,6.0,0.25,738.0,738.0,9.0,738.0,738.0,737.0,0.473459,0.431184,0.468053,0.25,34.137585,0.429122,34.432030,18122.0,35.739824,0.25,0.497503,0.226568,8.0,0.443000,4.0,6.0,6922.0,6922.0,6.0,46.585105,6922.0,36.661684,36.661684,5.0,0.311921,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.453120,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,6.0,1.0,1.0,0.076395,4.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,4.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.645641,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,2.0,23.412776,2.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,4251.973443
1,0.488048,0.488048,0.488048,17.983080,0.930914,0.216140,37.335004,37.289822,-36.358909,-36.404091,36.846956,36.846956,36.0,0.434444,0.442866,0.365981,16.318250,0.447175,0.446978,0.446907,0.447066,0.447121,0.614971,0.442866,0.665481,0.593281,0.593287,0.593476,0.593192,0.593351,0.280066,0.045182,0.562684,1.102023,0.463562,0.463710,0.463648,0.463648,0.463932,0.231153,0.431366,0.497296,0.443193,0.386647,0.351094,0.306586,0.138446,0.138469,0.012019,0.013245,0.276640,0.276687,0.210522,0.210557,42.0,0.846956,3293.0,3293.0,3.0,3.0,8.0,3418.0,3418.0,3.0,3.0,38.0,769.0,769.0,4.0,4.0,3955.0,1374.0,3.0,3.0,1373.0,3954.0,1374.0,1374.0,1374.0,11.0,1374.0,1374.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,1373.0,1372.0,1372.0,0.468665,5959.0,5.0,5958.0,2.0,5958.0,0.471161,0.10,1.0,6.0,5.0,6.0,8581.0,1.0,0.349354,0.349354,0.652144,1384.0,5.0,1.0,2722.0,2719.0,2721.0,2721.0,2719.0,7.0,7.0,7.0,7.0,7.0,11101.0,1400.0,5.0,1.0,1.0,9506.0,0.461480,5.0,0.067236,0.535660,2.0,11129.0,12.5,6.0,0.535660,4.0,2969.0,2972.0,2969.0,2971.0,2971.0,4.0,7.0,7.0,7.0,7.0,7.0,0.233554,5.0,0.524854,4.188904,6.0,0.536900,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,6.0,0.0,3049.0,4505.0,4506.0,4504.0,4505.0,11.0,4502.0,11.0,11.0,11.0,11.0,3.0,3.0,3.0,3.0,3.0,6.0,0.63,669.0,670.0,9.0,670.0,670.0,668.0,0.509934,0.448383,0.507141,0.75,36.311593,0.448256,36.360591,18122.0,35.739824,0.25,0.497503,0.233554,9.0,0.443000,4.0,6.0,6266.0,6270.0,6.0,46.585105,6272.0,35.756223,35.756223,5.0,0.320727,3.0,7.0,94.6,6269.0,68.2,6271.0,79.0,35.864247,3.0,6265.0,6268.0,6264.0,6266.0,6269.0,5.0,3.0,3.0,0.446451,3.0,12.0,7.0,3.0,55.9,3.0,6270.0,6.0,3.0,3.0,0.080261,5.0,57.2,0.038244,7.0,0.320727,5.0,6.0,0.445705,33.0,0.192732,5.0,5.0,0.192732,5.0,5.0,0.192732,6.0,0.192732,46.369344,6.0,4.0,4.0,6.0,0.320727,4.0,3.0,6.0,6.0,46.369344,3.0,23.412776,23.412776,6.0,5.0,23.412776,6.0,0.652144,0.320727,1.0,23.412776,0.652144,5.0,0.652144,0.652144,2.0,23.412776,1.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,2.0,6036.019978
2,0.583379,0.583379,0.583379,23.357110,1.070436,0.284139,40.621023,40.524701,-39.454265,-39.550587,40.037644,40.037644,40.0,0.872282,0.487057,0.870337,19.500607,0.828301,0.828326,0.828301,0.828326,0.828326,0.835669,0.487057,0.697895,0.783516,0.783548,0.783548,0.783661,0.783548,0.750268,0.096322,0.832631,1.197763,0.816310,0.816337,0.816337,0.816455,0.816310,0.729311,0.690755,0.870572,0.876153,0.664705,0.750681,0.729940,0.828281,0.828281,0.012165,0.014571,0.771830,0.771830,0.826961,0.826961,172.0,0.037644,3418.0,3418.0,6.0,6.0,36.0,3639.0,3639.0,6.0,6.0,47.0,1433.0,1433.0,13.0,13.0,5015.0,1934.0,6.0,6.0,1934.0,5015.0,1934.0,1934.0,1934.0,15.0,1934.0,1934.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,1934.0,1934.0,1934.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,8581.0,1.0,0.320727,0.320727,0.652144,1713.0,2.0,1.0,2908.0,2907.0,2907.0,2908.0,2908.0,1.0,1.0,1.0,1.0,1.0,11101.0,1736.0,2.0,1.0,1.0,9506.0,0.461480,3.0,0.067236,0.535660,1.0,11129.0,12.5,6.0,0.535660,4.0,3208.0,3208.0,3207.0,3208.0,3207.0,4.0,1.0,1.0,1.0,1.0,1.0,0.233554,5.0,0.524854,4.188904,6.0,0.536900,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,4.0,2.0,4375.0,4938.0,4939.0,4939.0,4939.0,2.0,4938.0,2.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,4.0,0.63,1435.0,1434.0,8.0,1435.0,1434.0,1435.0,0.504761,0.446562,0.502599,0.63,35.923314,0.446570,36.085144,18122.0,35.739824,0.25,0.497503,0.233554,8.0,0.443000,4.0,6.0,6922.0,6922.0,5.0,46.585105,6922.0,36.661684,36.661684,5.0,0.335339,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.453120,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,5.0,1.0,1.0,0.076395,5.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,5.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.645641,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,6.0,23.412776,2.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,7239.748855
3,0.433014,0.433014,0.433014,14.339741,0.855861,0.183099,33.549105,33.538938,-32.683077,-32.693244,33.116091,33.116091,33.0,0.132604,0.422847,0.157341,14.003027,0.177261,0.177235,0.177261,0.177235,0.177235,0.171759,0.422847,0.650267,0.211355,0.211325,0.211325,0.211355,0.211325,0.222146,0.010168,0.213296,1.024046,0.208827,0.208797,0.208797,0.208827,0.208827,0.238418,0.292995,0.199284,0.239312,0.308672,0.323086,0.338570,0.083009,0.083009,0.012769,0.013076,0.122497,0.122497,0.151001,0.151001,31.0,0.116091,3674.0,3674.0,4.0,4.0,29.0,3514.0,3514.0,4.0,4.0,19.0,2243.0,2243.0,5.0,5.0,2172.0,1934.0,4.0,4.0,1934.0,2172.0,1934.0,1934.0,1934.0,15.0,1934.0,1934.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,1934.0,1934.0,1934.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,4698.0,3.0,0.320727,0.320727,0.652144,3977.0,6.0,1.0,3836.0,3836.0,3836.0,3836.0,3836.0,5.0,5.0,5.0,5.0,5.0,13056.0,3870.0,6.0,0.0,3.0,4641.0,0.436183,3.0,0.068009,0.533821,1.0,13814.0,25.0,5.0,0.535660,4.0,3603.0,3603.0,3603.0,3603.0,3603.0,4.0,5.0,5.0,5.0,5.0,5.0,0.192732,7.0,0.483347,3.469912,5.0,0.488876,0.50,7.0,0.437473,6.0,0.032394,35.104165,5.0,4.0,4.0,3577.0,1930.0,1930.0,1930.0,1930.0,10.0,1930.0,10.0,10.0,10.0,10.0,4.0,4.0,4.0,4.0,4.0,4.0,0.63,2023.0,2023.0,8.0,2023.0,2023.0,2023.0,0.512803,0.449702,0.509669,0.38,36.682588,0.450692,36.554771,7266.0,37.228260,0.38,0.529071,0.233554,8.0,0.455488,5.0,4.0,6922.0,6922.0,5.0,46.585105,6922.0,36.661684,36.661684,5.0,0.320727,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.453120,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,5.0,1.0,1.0,0.076395,5.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,3.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.652144,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,2.0,23.412776,2.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,4503.161860
4,0.360996,0.360996,0.388860,10.670317,0.749856,0.140377,29.919015,29.946879,-29.197023,-29.169159,29.558019,29.558019,29.0,0.045028,0.360996,0.114296,11.493940,0.054660,0.054616,0.054608,0.054608,0.054634,0.052312,0.388860,0.623587,0.058490,0.058444,0.058462,0.058434,0.058434,0.271795,-0.027865,0.123117,0.928343,0.066656,0.066677,0.066645,0.066645,0.066709,0.326622,0.351282,0.070384,0.155274,0.368009,0.382051,0.436242,0.031863,0.031706,0.013156,0.012213,0.035159,0.035002,0.053759,0.053602,46.0,0.558019,110.0,110.0,9.0,9.0,36.0,47.0,47.0,10.0,10.0,40.0,684.0,684.0,18.0,18.0,4651.0,1583.0,0.0,0.0,1583.0,4651.0,1584.0,1583.0,1584.0,13.0,1583.0,1584.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,1582.0,1582.0,1581.0,0.437646,6371.0,4.0,6371.0,0.0,6370.0,0.437266,0.39,24.0,4.0,4.0,4.0,433.0,4.0,0.320727,0.320727,0.645641,415.0,10.0,2.0,124.0,124.0,124.0,124.0,124.0,10.0,10.0,10.0,10.0,10.0,146.0,163.0,9.0,3.0,4.0,183.0,0.397030,5.0,0.073830,0.514790,2.0,447.0,37.5,4.0,0.535660,4.0,61.0,61.0,61.0,61.0,61.0,4.0,10.0,10.0,10.0,10.0,10.0,0.233554,5.0,0.403677,3.763778,4.0,0.386657,0.75,4.0,0.402506,2.0,0.035390,31.339562,3.0,5.0,5.0,2712.0,4505.0,4506.0,4504.0,4505.0,16.0,4502.0,16.0,16.0,16.0,16.0,3.0,3.0,3.0,3.0,3.0,6.0,0.25,730.0,730.0,9.0,730.0,730.0,730.0,0.473459,0.431184,0.468053,0.25,34.137585,0.429122,34.432030,18122.0,35.739824,0.25,0.497503,0.226568,8.0,0.443000,4.0,6.0,6266.0,6270.0,6.0,43.974882,6272.0,35.756223,35.756223,4.0,0.311921,3.0,7.0,94.6,6269.0,68.2,6271.0,79.0,35.864247,3.0,6265.0,6268.0,6264.0,6266.0,6269.0,3.0,3.0,3.0,0.446451,3.0,12.0,7.0,3.0,55.9,3.0,6270.0,6.0,3.0,3.0,0.080261,4.0,57.2,0.038244,7.0,0.311921,5.0,6.0,0.445705,33.0,0.192732,5.0,5.0,0.192732,5.0,5.0,0.192732,6.0,0.192732,46.369344,6.0,4.0,4.0,6.0,0.320727,4.0,3.0,6.0,6.0,46.369344,4.0,23.412776,23.412776,6.0,5.0,23.412776,6.0,0.645641,0.320727,1.0,23.412776,0.652144,5.0,0.652144,0.652144,2.0,23.412776,1.0,0.652144,46.585105,43.974882,6.0,0.621811,0.652144,0.53566,4.0,0.311921,2.0,4211.327659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10189,0.474162,0.474162,0.474162,16.370577,0.912085,0.207646,34.999421,34.963181,-34.051096,-34.087335,34.525258,34.525258,34.0,0.366654,0.437923,0.504594,15.119395,0.306342,0.306442,0.306487,0.306442,0.306442,0.374683,0.437923,0.661757,0.315082,0.315037,0.315037,0.315082,0.315037,0.230912,0.036240,0.515068,1.082753,0.340725,0.340676,0.340676,0.340725,0.340725,0.433944,0.241312,0.448433,0.579566,0.435355,0.312877,0.511003,0.417831,0.417831,0.012684,0.013734,0.424894,0.424894,0.512243,0.512243,79.0,0.525258,3891.0,3892.0,0.0,0.0,22.0,2616.0,2616.0,1.0,1.0,63.0,684.0,684.0,18.0,18.0,4651.0,1717.0,0.0,0.0,1717.0,4651.0,1717.0,1717.0,1717.0,1.0,1717.0,1717.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1717.0,1716.0,1716.0,0.437646,6371.0,4.0,6371.0,0.0,6370.0,0.437266,0.39,24.0,4.0,4.0,4.0,9101.0,0.0,0.320727,0.320727,0.645641,557.0,9.0,2.0,3836.0,3836.0,3836.0,3836.0,3836.0,5.0,5.0,5.0,5.0,5.0,719.0,2520.0,7.0,2.0,1.0,9506.0,0.436183,5.0,0.068009,0.533821,2.0,13814.0,25.0,5.0,0.535660,4.0,3208.0,3208.0,3207.0,3208.0,3207.0,4.0,1.0,1.0,1.0,1.0,1.0,0.192732,7.0,0.524854,4.188904,6.0,0.536900,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,5.0,5.0,2712.0,4938.0,4939.0,4939.0,4939.0,9.0,4938.0,9.0,9.0,9.0,9.0,1.0,1.0,1.0,1.0,1.0,6.0,0.25,738.0,738.0,9.0,738.0,738.0,737.0,0.473459,0.431184,0.468053,0.25,34.137585,0.429122,34.432030,18122.0,35.739824,0.25,0.497503,0.226568,8.0,0.443000,4.0,6.0,6922.0,6922.0,6.0,46.585105,6922.0,36.661684,36.661684,5.0,0.311921,1.0,10.0,86.0,6922.0,62.0,6922.0,71.9,36.508523,1.0,6922.0,6923.0,6922.0,6921.0,6923.0,5.0,1.0,1.0,0.453120,1.0,14.0,10.0,1.0,50.8,1.0,6923.0,6.0,1.0,1.0,0.076395,4.0,52.0,0.036841,10.0,0.320727,7.0,6.0,0.452137,30.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,6.0,0.233554,46.585105,6.0,2.0,1.0,6.0,0.311921,2.0,2.0,6.0,5.0,46.585105,4.0,22.079199,22.079199,5.0,5.0,22.079199,5.0,0.645641,0.311921,1.0,22.079199,0.652144,5.0,0.652144,0.652144,2.0,23.412776,2.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,1.0,5468.394201
10190,0.482854,0.482854,0.482854,17.212891,0.923530,0.212782,36.131075,36.088896,-35.165366,-35.207545,35.648221,35.648221,35.0,0.412151,0.440676,0.369521,15.709301,0.253996,0.253879,0.254035,0.253879,0.253918,0.499117,0.440676,0.663834,0.309856,0.309748,0.309796,0.309748,0.309748,0.468915,0.042179,0.448843,1.095713,0.254389,0.254232,0.254192,0.254349,0.254309,0.486246,0.560450,0.477679,0.430737,0.575539,0.528867,0.544194,0.226224,0.226224,0.012362,0.013545,0.305895,0.305895,0.302697,0.302697,58.0,0.648221,3674.0,3674.0,4.0,4.0,46.0,3514.0,3514.0,4.0,4.0,31.0,2025.0,2025.0,16.0,16.0,5015.0,1890.0,6.0,6.0,1888.0,5015.0,1890.0,1889.0,1889.0,14.0,1890.0,1890.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,1888.0,1890.0,1890.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,9101.0,0.0,0.320727,0.320727,0.652144,4318.0,1.0,1.0,3378.0,3378.0,3377.0,3378.0,3376.0,3.0,3.0,3.0,3.0,3.0,13056.0,4255.0,1.0,0.0,0.0,8428.0,0.436183,3.0,0.068009,0.533821,1.0,13814.0,25.0,5.0,0.535660,4.0,3231.0,3233.0,3233.0,3233.0,3232.0,4.0,3.0,3.0,3.0,3.0,3.0,0.192732,7.0,0.483347,3.469912,5.0,0.488876,0.50,7.0,0.437473,6.0,0.032394,35.104165,5.0,4.0,1.0,3904.0,4408.0,4408.0,4407.0,4409.0,8.0,4409.0,8.0,8.0,8.0,8.0,2.0,2.0,2.0,2.0,2.0,4.0,0.75,1684.0,1685.0,8.0,1685.0,1685.0,1685.0,0.507215,0.448406,0.506261,0.50,36.166044,0.446576,36.392487,18122.0,35.739824,0.25,0.497503,0.192732,8.0,0.443000,4.0,6.0,6381.0,6380.0,5.0,46.585105,6381.0,37.712513,37.712513,5.0,0.320727,2.0,5.0,77.4,6379.0,55.8,6380.0,64.7,37.402177,2.0,6381.0,6379.0,6380.0,6381.0,6379.0,5.0,2.0,2.0,0.463759,2.0,10.0,5.0,2.0,45.8,2.0,6380.0,5.0,2.0,2.0,0.070664,5.0,46.8,0.033306,5.0,0.320727,7.0,5.0,0.460807,27.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,5.0,0.233554,46.585105,5.0,1.0,2.0,5.0,0.311921,1.0,1.0,5.0,5.0,46.585105,4.0,23.412776,23.412776,5.0,8.0,23.412776,5.0,0.652144,0.311921,2.0,23.412776,0.652144,8.0,0.652144,0.652144,4.0,24.320627,2.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,2.0,5632.550356
10191,0.568854,0.568854,0.568854,21.459841,1.031918,0.263416,38.293577,38.187788,-37.155870,-37.261659,37.724724,37.724724,37.0,0.726603,0.463065,0.772022,17.468983,0.689674,0.689625,0.689625,0.689625,0.689577,0.542252,0.463065,0.680488,0.504309,0.504231,0.504154,0.504231,0.504231,0.908083,0.105789,0.633855,1.228454,0.472335,0.472335,0.472261,0.472261,0.472344,0.911249,0.778810,0.557666,0.647363,0.787643,0.773457,0.781924,0.735053,0.735053,0.012275,0.015079,0.558468,0.558468,0.568548,0.568548,31.0,0.724724,3674.0,3674.0,4.0,4.0,52.0,3514.0,3514.0,4.0,4.0,51.0,2025.0,2025.0,16.0,16.0,2172.0,1890.0,4.0,4.0,1888.0,2172.0,1890.0,1889.0,1889.0,14.0,1890.0,1890.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,1888.0,1890.0,1890.0,0.456046,7192.0,3.0,7192.0,3.0,7192.0,0.457846,0.26,16.0,6.0,5.0,5.0,4698.0,3.0,0.320727,0.320727,0.652144,4318.0,1.0,1.0,3378.0,3378.0,3377.0,3378.0,3376.0,3.0,3.0,3.0,3.0,3.0,13056.0,4255.0,1.0,0.0,3.0,4641.0,0.436183,3.0,0.068009,0.533821,1.0,13814.0,25.0,5.0,0.535660,4.0,3231.0,3233.0,3233.0,3233.0,3232.0,4.0,3.0,3.0,3.0,3.0,3.0,0.192732,7.0,0.483347,3.469912,5.0,0.488876,0.50,7.0,0.437473,6.0,0.032394,35.104165,5.0,4.0,3.0,3053.0,1960.0,1960.0,1960.0,1960.0,8.0,1960.0,8.0,8.0,8.0,8.0,6.0,6.0,6.0,6.0,6.0,4.0,0.50,1684.0,1685.0,8.0,1685.0,1685.0,1685.0,0.507215,0.448406,0.506261,0.50,36.166044,0.446576,36.392487,7266.0,37.228260,0.38,0.529071,0.192732,8.0,0.455488,5.0,4.0,6381.0,6380.0,5.0,46.585105,6381.0,37.712513,37.712513,5.0,0.320727,2.0,5.0,77.4,6379.0,55.8,6380.0,64.7,37.402177,2.0,6381.0,6379.0,6380.0,6381.0,6379.0,5.0,2.0,2.0,0.463759,2.0,10.0,5.0,2.0,45.8,2.0,6380.0,5.0,2.0,2.0,0.070664,5.0,46.8,0.033306,5.0,0.320727,7.0,5.0,0.460807,27.0,0.233554,7.0,7.0,0.233554,7.0,7.0,0.233554,5.0,0.233554,46.585105,5.0,1.0,2.0,5.0,0.311921,1.0,1.0,5.0,5.0,46.585105,4.0,23.412776,23.412776,5.0,8.0,23.412776,5.0,0.652144,0.311921,2.0,23.412776,0.652144,8.0,0.652144,0.652144,4.0,24.320627,2.0,0.652144,46.369344,46.585105,5.0,0.652144,0.652144,0.53566,6.0,0.320727,2.0,6470.073753
10192,0.407374,0.407374,0.409261,12.987821,0.816634,0.166722,32.289220,32.291107,-31.474473,-31.472586,31.881847,31.881847,31.0,0.149983,0.407374,0.112498,13.047983,0.180418,0.180274,0.180246,0.180086,0.180332,0.195784,0.409261,0.639735,0.223268,0.223250,0.223321,0.223214,0.223055,0.093727,-0.001887,0.156360,0.995389,0.205948,0.206014,0.205915,0.205915,0.206112,0.056699,0.129633,0.208945,0.163747,0.090529,0.137020,0.098616,0.315054,0.314938,0.012837,0.012778,0.408719,0.408789,0.405739,0.405638,31.0,0.881847,2052.0,2052.0,5.0,5.0,57.0,2429.0,2430.0,5.0,5.0,43.0,1625.0,1625.0,10.0,10.0,4477.0,1539.0,1.0,1.0,1538.0,4477.0,1539.0,1538.0,1538.0,3.0,1538.0,1538.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,1538.0,1537.0,1537.0,0.422140,5872.0,8.0,5871.0,1.0,5870.0,0.417915,0.56,34.0,5.0,6.0,4.0,8581.0,1.0,0.311921,0.311921,0.645641,3654.0,8.0,2.0,2722.0,2719.0,2721.0,2721.0,2719.0,7.0,7.0,7.0,7.0,7.0,11101.0,3762.0,8.0,1.0,1.0,9506.0,0.461480,8.0,0.067236,0.535660,3.0,11129.0,12.5,6.0,0.532223,6.0,2969.0,2972.0,2969.0,2971.0,2971.0,6.0,7.0,7.0,7.0,7.0,7.0,0.233554,5.0,0.524854,4.188904,6.0,0.536900,0.25,5.0,0.457101,5.0,0.038487,37.388526,4.0,6.0,6.0,4072.0,4505.0,4506.0,4504.0,4505.0,7.0,4502.0,7.0,7.0,7.0,7.0,3.0,3.0,3.0,3.0,3.0,6.0,0.50,1857.0,1857.0,9.0,1857.0,1857.0,1856.0,0.512803,0.449702,0.509669,0.38,36.682588,0.450692,36.554771,18122.0,35.739824,0.25,0.497503,0.233554,9.0,0.443000,4.0,6.0,6266.0,6270.0,5.0,46.585105,6272.0,35.756223,35.756223,5.0,0.320727,3.0,7.0,94.6,6269.0,68.2,6271.0,79.0,35.864247,3.0,6265.0,6268.0,6264.0,6266.0,6269.0,5.0,3.0,3.0,0.446451,3.0,12.0,7.0,3.0,55.9,3.0,6270.0,5.0,3.0,3.0,0.080261,5.0,57.2,0.038244,7.0,0.320727,5.0,6.0,0.445705,33.0,0.192732,5.0,5.0,0.192732,5.0,5.0,0.192732,6.0,0.192732,46.369344,6.0,4.0,4.0,6.0,0.320727,4.0,3.0,6.0,6.0,46.369344,3.0,23.412776,23.412776,6.0,5.0,23.412776,6.0,0.652144,0.320727,1.0,23.412776,0.652144,5.0,0.652144,0.652144,2.0,23.412776,1.0,0.652144,46.585105,46.585105,6.0,0.652144,0.652144,0.53566,6.0,0.320727,2.0,4500.995972


In [None]:
test_pred[test_pred['fruitset_x_fruitmass'] == 0.1346910955928016]

In [128]:
submission = pd.read_csv('sample_submission.csv')['id']

submission_df = pd.concat([submission, test_pred[TARGET]], axis=1)
submission_df


Unnamed: 0,id,yield
0,15289,4251.973443
1,15290,6036.019978
2,15291,7239.748855
3,15292,4503.161860
4,15293,4211.327659
...,...,...
10189,25478,5468.394201
10190,25479,5632.550356
10191,25480,6470.073753
10192,25481,4500.995972


In [129]:
submission_df.to_csv('hist_3cv_openfe_mi_manualcorrect_355.8268.csv', index=False)

## Stacking

In [None]:
meta_model = Ridge()

In [None]:
%%time

meta_scores = []

for i, (train_idx, meta_idx) in enumerate(k10.split(X_train)):
    print(f'Fold {i + 1}')
    X_train_meta, X_test_meta = X_train.iloc[train_idx], X_train.iloc[meta_idx]
    y_train_meta, y_test_meta = y_train.iloc[train_idx], y_train.iloc[meta_idx]

    print(X_train_meta.shape, X_test_meta.shape, y_train_meta.shape, y_test_meta.shape)
    
    meta_features_fold = np.zeros((X_test_meta.shape[0], len(models)))
    # meta_test_features = np.zeros((y.shape[0], len(models)))
    # meta_targets = np.zeros(y.shape[0])

    for i, model in enumerate(models):
        model_name = model.__class__.__name__ if not hasattr(model, 'name') else model.name
        print(f'Starting {model_name}')
        model_features = sfs_features[model_name]
        # model_features = baseline_features[model_name]

        # Fit model on the selected features
        model.fit(X_train_meta[model_features], y_train_meta)
        preds = model.predict(X_test_meta[model_features])
        meta_features_fold[:, i] = preds

    # Train the meta-model on the predictions from the base models
    meta_model.fit(meta_features_fold, y_test_meta)
    
    # Predict using the meta-model
    final_preds = meta_model.predict(meta_features_fold)
    
    # Calculate r2_score for the current fold
    current_fold_mae = mean_absolute_error(y_test_meta, final_preds)
    meta_scores.append(current_fold_mae)

In [None]:
# Calculate the average MAE across all folds
average_mae = np.mean(meta_scores)
average_mae

# 350.36268333368736 Baseline Features
# 348.51521661303786 SFS Features

In [None]:
%%time

# Retrain base models on all data
all_base_model_predictions = []

for model in models:
    model_name = model.__class__.__name__ if not hasattr(model, 'name') else model.name
    print(f'Starting {model_name}')
    model_features = sfs_features[model_name]
    # model_features = baseline_features[model_name]

    model.fit(X_train[model_features], y_train)
    preds = model.predict(X_test[model_features])

    all_base_model_predictions.append(preds.reshape(-1, 1))

# Stack predictions for the meta model
X_new_meta = np.hstack(all_base_model_predictions)

# Use the meta model to make final predictions
final_predictions = meta_model.predict(X_new_meta)

In [None]:
final_predictions = pd.DataFrame(final_predictions, columns=[TARGET])
final_predictions = pd.concat([X_test, final_predictions], axis=1)
final_predictions

In [None]:
model_names = []
for model in models:
    model_name = model.__class__.__name__ if not hasattr(model, 'name') else model.name
    model_names.append(model_name)
model_names

print('Ensemble weights')
weights = pd.Series(meta_model.coef_, index=model_names)
print(weights)
print(f'Weights total: {weights.sum()}')
print(f'Intercept: {meta_model.intercept_}', end='\n\n')
print(f"Average Stacking MAE across all folds: {average_mae:.5f}")

In [None]:
# Get list of all feature columns (excluding TARGET)
feature_columns = df_train.columns[df_train.columns != TARGET]

# Initialize a dictionary to store corrections
corrections = {}

# Iterate over each feature column
for column in feature_columns:
    # Count occurrences of each value in the column
    value_counts = df_train[column].value_counts()

    # Filter values that appear at least twice
    values_to_check = value_counts[value_counts >= 2].index
     
    # Iterate over each value in the column that appears at least twice
    for value in values_to_check:
        # Get rows where column equals value
        rows_with_value = df_train[df_train[column] == value]

        # Check if all rows with this value in column have the same target
        if rows_with_value[TARGET].nunique() == 1:
            target_value = rows_with_value[TARGET].iloc[0]
        
            # Store correction if it matches training pattern
            if (column, value, target_value) not in corrections:
                corrections[(column, value)] = target_value

In [None]:
corrections

In [None]:
# Apply corrections to final_predictions
for (column, value), target_value in corrections.items():
    final_predictions.loc[final_predictions[column] == value, TARGET] = target_value

# Print corrected predictions
print("Corrected predictions in final_predictions:")
final_predictions

In [None]:
final_predictions_df = pd.DataFrame(final_predictions[TARGET], columns=[TARGET])
final_predictions_df.head()

In [None]:
submission = pd.read_csv('sample_submission.csv')['id']

submission_stack_df = pd.concat([submission, final_predictions_df[TARGET]], axis=1)
submission_stack_df

In [None]:
submission_stack_df.to_csv(f'stack_3cv_sfs_manualcorrect_{average_mae}.csv', index=False)