# Declaring target feature

In [1]:
target_features = ['GrainYield']
# target_features = ['Days2Maturity']


# path = r'./Data/3. merged data/no outliers dropped/'
path = r'./Data/3. merged data/dropped only 1 round/'
# path = r'./Data/3. merged data/all outliers dropped repeatatively/'

# Importing Libraries

In [2]:
import os
import csv
import numpy as np
import pandas as pd
from copy import copy
from datetime import datetime as dt

# Dictionaries
import json
from pprint import pprint

# Iterate in loops
import itertools
from itertools import zip_longest

# Simpsons integration
from numpy import trapz
from scipy.integrate import simps

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt

# To display df nicely in loops
from IPython.display import display 
# Display rows and columns Pandas
pd.options.display.max_columns = 100
pd.set_option('display.max_rows',100)

# # For displaying max rows in series
# pd.options.display.max_rows = 10

# Importing data

In [3]:
# Prints the current working directory
os.getcwd()
# os.listdir()

'C:\\Users\\fahad\\MegaSync\\NMBU\\GitHub\\vPheno'

## Finding Username folder to make general path for multi PC use

In [4]:
username = str(os.getcwd()).split('\\')[2]
user_path = r'C:/Users/'+username+'/'
username, user_path

('fahad', 'C:/Users/fahad/')

## Declaring Import paths

In [5]:
main_path = r'./Data/'



## Declaring export paths
if target_features[0] == 'GrainYield':
    if path.split('/')[-2] == 'no outliers dropped':
        export_path = './Data/4. feat_GY_all_data/'

    elif path.split('/')[-2] == 'dropped only 1 round':
        export_path = './Data/4. feat_GY_do1r/'

    elif path.split('/')[-2] == 'all outliers dropped repeatatively':
        export_path = './Data/4. feat_GY_aodr/'

elif target_features[0] == 'Days2Maturity':
    if path.split('/')[-2] == 'no outliers dropped':
        export_path = './Data/4. feat_DM_all_data/'

    elif path.split('/')[-2] == 'dropped only 1 round':
        export_path = './Data/4. feat_DM_do1r/'

    elif path.split('/')[-2] == 'all outliers dropped repeatatively':
        export_path = './Data/4. feat_DM_aodr/'

# Create export_path folder if not exists already
os.makedirs(path, exist_ok=True)
os.makedirs(export_path, exist_ok=True)

# os.listdir(path)

In [6]:
# Making dictionary of files in each folder, in case there are multiple types of data
dict_paths = {}
def explore(starting_path):
    for dirpath, dirnames, filenames in os.walk(starting_path):
        dict_paths[dirpath.split('/')[-2]] = filenames
#     pprint(dict_paths)
explore(path)

# Data Preparation
## Creating list of complete files

In [7]:
# Get the list of all files in directory tree at given path

files_with_address = []
files_list = []

for (dirpath, dirnames, filenames) in os.walk(path):
    files_with_address += [os.path.join(dirpath, file) for file in filenames]
    files_list.extend(filenames)
    
print(len(files_with_address), 'files found in the directory')
# files_with_address
# files_list

14 files found in the directory


## Data Checking/control

### Check for duplicate filenames

In [8]:
print('Total number of files are :', len(files_list))
print('Number of unique file names are:', len(set(files_list)))
print('There is/are', len(files_list) - len(set(files_list)),'duplicate file name/names.')
if len(files_list) - len(set(files_list)) > 0:
    raise NameError

Total number of files are : 14
Number of unique file names are: 14
There is/are 0 duplicate file name/names.


# Importing data files to Pandas

In [9]:
all_df = []
for data in files_with_address:
    file_name = os.path.splitext(os.path.basename(data))[0]

    # Replce all invalid characters in the name
    file_name = file_name.replace(" ", "_")
    file_name = file_name.replace("-", "_")
    file_name = file_name.replace(")", "")
    file_name = file_name.replace("(", "")
    df_name = file_name.replace(".", "")
    # Test: Check if the same date is already present in the current dict key
    if df_name in all_df:
        print(f'A file with the same name {df_name} has already been imported. \n Please check if there is duplication of data.')
        raise NameError
    all_df.append(df_name)

    locals()[df_name] = pd.read_csv(data, index_col=False)
    print(df_name, '=====', locals()[df_name].shape)
# all_df

Graminor_2019_Simps ===== (578, 122)
Graminor_2019_Trapz ===== (583, 122)
Graminor_2020_Simps ===== (734, 122)
Graminor_2020_Trapz ===== (736, 122)
Masbasis_2019_Simps ===== (485, 123)
Masbasis_2019_Trapz ===== (490, 123)
Masbasis_2020_Simps ===== (528, 125)
Masbasis_2020_Trapz ===== (531, 125)
Robot_2020_Simps ===== (94, 124)
Robot_2020_Trapz ===== (94, 124)
Staur_2019_Simps ===== (1233, 126)
Staur_2019_Trapz ===== (1240, 126)
Staur_2020_Simps ===== (1431, 123)
Staur_2020_Trapz ===== (1423, 123)


In [10]:
print(f'Total imported {len(all_df)}')
# all_df

Total imported 14


# Finding yield columns

## Importing Weather variables, yield columns, spectral indices, base indices columsn list

In [11]:
a_file = open(main_path+'vollebekk_weather_columns.json', "r")
output_str = a_file.read()
# The file is imported as string
# Converting it to python format
weather_cols_vollebekk = json.loads(output_str)
a_file.close()
pprint(len(weather_cols_vollebekk))

80


In [12]:
a_file = open(main_path+'staur_weather_columns.json', "r")
output_str = a_file.read()
# The file is imported as string
# Converting it to python format
weather_cols_staur = json.loads(output_str)
a_file.close()
pprint(len(weather_cols_staur))

80


In [13]:
a_file = open(main_path+"yield_columns.json", "r")
output_str = a_file.read()
# The file is imported as string
# Converting it to python format
yield_cols = json.loads(output_str)
a_file.close()
print(yield_cols)

['Entry', 'CodeName', 'Block', 'Pedigree', 'Replicates', 'iBlock', 'Heading_Date', 'Maturity_Date', 'Days2Maturity', 'Line', 'Lodging', 'GrainYield', 'Days2Heading', 'Name']


In [14]:
a_file = open(main_path+"spectral_indices_columns.json", "r")
output_str = a_file.read()
# The file is imported as string
# Converting it to python format
spectral_indices_all = json.loads(output_str)
a_file.close()
print(spectral_indices_all)

['NDVI', 'MTCI', 'DVI', 'GDVI', 'EXG', 'EXGR', 'RDVI', 'TDVI', 'GNDVI', 'NDRE', 'SCCI', 'EVI', 'TVI', 'VARI', 'GARI', 'GCI', 'GLI', 'NLI', 'MNLI', 'SAVI', 'GSAVI', 'OSAVI', 'GOSAVI', 'MSAVI2', 'MSR', 'GRVI', 'WDRVI', 'SR']


In [15]:
a_file = open(main_path+"base_indices_columns.json", "r")
output_str = a_file.read()
# The file is imported as string
# Converting it to python format
base_indices = json.loads(output_str)
a_file.close()
print(base_indices)

['Blue', 'Green', 'Red', 'RedEdge', 'NIR']


## XXXXXX Defining categories of features

In [16]:
# ToDo: Add check for duplicate columns in the df
base_indices
spectral_indices_all 
drop_indices = ['EVI', 'GLI', 'MTCI']
spectral_indices = [x for x in spectral_indices_all if x not in drop_indices]

# Staur weather columns are all also present in Vollebekk weather so they can be use as general weather features
weather_features = weather_cols_staur.copy()
environment_var = weather_features + ['Staur_Env', 'Vollebekk_Env']
# yield_cols

# Dropping DF which donot have the Target Feature

In [17]:
# Dropping unnecessary columns
all_df_dm = []
for df in all_df:
    temp_df = locals()[df].copy()
    if target_features[0] in temp_df.columns.tolist():
        all_df_dm.append(df)
#         print(df)

In [18]:
all_df_simps = [x for x in all_df_dm if 'Simps' in x]
all_df_trapz = [x for x in all_df_dm if 'Trapz' in x]
# all_df_simps, all_df_trapz

# Dropping unnecessary columns


In [19]:
# Dropping unnecessary columns

for df in all_df_dm:
    temp_df = locals()[df].copy()
    locals()[df] = temp_df[base_indices+spectral_indices_all+environment_var+['Name',target_features[0]]]
    print(df, temp_df.shape, '==>', locals()[df].shape)


Graminor_2019_Simps (578, 122) ==> (578, 117)
Graminor_2019_Trapz (583, 122) ==> (583, 117)
Graminor_2020_Simps (734, 122) ==> (734, 117)
Graminor_2020_Trapz (736, 122) ==> (736, 117)
Masbasis_2019_Simps (485, 123) ==> (485, 117)
Masbasis_2019_Trapz (490, 123) ==> (490, 117)
Masbasis_2020_Simps (528, 125) ==> (528, 117)
Masbasis_2020_Trapz (531, 125) ==> (531, 117)
Robot_2020_Simps (94, 124) ==> (94, 117)
Robot_2020_Trapz (94, 124) ==> (94, 117)
Staur_2019_Simps (1233, 126) ==> (1233, 117)
Staur_2019_Trapz (1240, 126) ==> (1240, 117)
Staur_2020_Simps (1431, 123) ==> (1431, 117)
Staur_2020_Trapz (1423, 123) ==> (1423, 117)


# Dealing with Nan values

## Dropping Missing values

In [20]:
# Dropping rows with missing value in any column

for df in all_df_dm:
    temp_df = locals()[df].copy()
    locals()[df] = temp_df.dropna(axis=0)
    print(temp_df.shape[0] - locals()[df].shape[0], ' rows dropped in ', df)
#     print(locals()[df].shape[0])

1  rows dropped in  Graminor_2019_Simps
1  rows dropped in  Graminor_2019_Trapz
1  rows dropped in  Graminor_2020_Simps
1  rows dropped in  Graminor_2020_Trapz
2  rows dropped in  Masbasis_2019_Simps
3  rows dropped in  Masbasis_2019_Trapz
31  rows dropped in  Masbasis_2020_Simps
31  rows dropped in  Masbasis_2020_Trapz
8  rows dropped in  Robot_2020_Simps
8  rows dropped in  Robot_2020_Trapz
0  rows dropped in  Staur_2019_Simps
0  rows dropped in  Staur_2019_Trapz
536  rows dropped in  Staur_2020_Simps
533  rows dropped in  Staur_2020_Trapz


# Normalizing the data using Z-Score from scipy

In [21]:
from scipy.stats import zscore

for df in all_df_dm:
    temp_df = locals()[df].copy()
    for col in temp_df.columns:
        # Checking if the column is not a yield column
        if col not in yield_cols+environment_var:
            temp_df[col] = zscore(temp_df[col])
    locals()[df] = temp_df.copy()
    print(df)

Graminor_2019_Simps
Graminor_2019_Trapz
Graminor_2020_Simps
Graminor_2020_Trapz
Masbasis_2019_Simps
Masbasis_2019_Trapz
Masbasis_2020_Simps
Masbasis_2020_Trapz
Robot_2020_Simps
Robot_2020_Trapz
Staur_2019_Simps
Staur_2019_Trapz
Staur_2020_Simps
Staur_2020_Trapz


# Importing functions 

In [22]:
from functions.save_results import save_results, save_grid_results
from functions.split_data import list_test_train_df, data_prep_field
from functions.regression import training_regr, training_gkf_std, grid
from functions.plot_featimp import plot_feat_imp

# ML Models

In [23]:
import time
# Pre.Processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
# from sklearn.model_selection import StratifiedGroupKFold
# Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
# Models
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
# Metrices
from sklearn.model_selection import cross_validate
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

## XXXXXX Parameters

In [24]:
threshold_all = 'top_25'
sort_feature_imp = True
show_feat_imp_plot = True
save_feat_imp_plot = True
save_results_now = True
agg_method = 'Simpsons'
# agg_method = 'Trapezoid'

In [25]:
training_features = base_indices + spectral_indices_all + environment_var
# training_features = base_indices + spectral_indices_all + weather_features
# training_features =  base_indices + spectral_indices + weather_features
# training_features =  spectral_indices + weather_features
# training_features = spectral_indices

target_features

group_feature = ['Name']

if agg_method == 'Simpsons':
    all_df_now = all_df_simps.copy()
elif agg_method == 'Trapezoid': 
    all_df_now = all_df_trapz.copy()

## Fine Tuning the models using all data

### All data mixed

In [26]:
temp_list = [x for x in all_df_now if not 'Robot' in x]

# Making list of df for conct before training
# This is different form list of srtings, as this is a list of actual dataframes
df_list = []
for x in temp_list:
    df_list.append(locals()[x])

# Conct all df to one df    
df_ = pd.concat(df_list).reset_index(drop=True)

# Shuffeling all the items/rows
df_shuffle = df_.sample(frac=1, random_state=1).reset_index(drop=True)

X = df_shuffle[training_features+['Name']]
y = df_shuffle[target_features].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

groups = X_train[group_feature].values.ravel()

X_train = X_train.drop(['Name'], axis=1).values
X_test = X_test.drop(['Name'], axis=1).values

gkf = list(GroupKFold(n_splits=6).split(X_train, y_train, groups))
X_train.shape

(2960, 115)

### Feature Selection

In [27]:
models = [
    LinearRegression(),
    GradientBoostingRegressor(random_state=1),
]

In [28]:
for model in models:
    print(model)

LinearRegression()
GradientBoostingRegressor(random_state=1)


In [None]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
for model in models:
    for forw in [True, False]:
        print(model)
        sfs1 = SFS(model, 
                   k_features='best', 
                   forward=forw, 
                   floating=False, 
                   verbose=1,
                   scoring='r2',
                   cv=3,
                  n_jobs=-1)
        if forw:
            str_fore='SFS'
        else:
            str_fore='SBS'
        model_name =  str(model).split('(')[0]

        # Fit the data to try all feature combinations
        sfs1 = sfs1.fit(X.drop(['Name'], axis=1), y)
        
        # Saving results
        sfs_res = pd.DataFrame.from_dict(sfs1.subsets_).T
        sfs_res.to_csv(export_path+model_name+'_'+str_fore+'_'+target_features[0]+'_'+agg_method+'.csv')


LinearRegression()


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 100 out of 115 | elapsed:    8.4s remaining:    1.2s
[Parallel(n_jobs=-1)]: Done 115 out of 115 | elapsed:    9.1s finished
Features: 1/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 114 out of 114 | elapsed:    0.2s finished
Features: 2/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 113 out of 113 | elapsed:    0.2s finished
Features: 3/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:    0.2s finished
Features: 4/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel

[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  70 out of  85 | elapsed:    0.7s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  85 out of  85 | elapsed:    1.0s finished
Features: 31/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done  69 out of  84 | elapsed:    0.7s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  84 out of  84 | elapsed:    1.1s finished
Features: 32/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-1)]: Done  68 out of  83 | elapsed:    1.2s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  83 out of  83 | elapsed:    1.4s finished
Features: 33/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  82 out of  82 | elapsed:    1.1s finished
Features: 3

[Parallel(n_jobs=-1)]: Done  49 out of  49 | elapsed:    1.3s finished
Features: 67/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:    1.1s finished
Features: 68/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  47 out of  47 | elapsed:    1.4s finished
Features: 69/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  31 out of  46 | elapsed:    0.7s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  46 out of  46 | elapsed:    1.1s finished
Features: 70/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:    0.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    1.0s finished
Features: 71/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: 

Features: 111/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    0.3s finished
Features: 112/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.3s finished
Features: 113/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    0.2s finished
Features: 114/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.2s finished
Features: 115/115

LinearRegression()


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.1s
[Parallel(n_jobs=-1)]: Done 115 out of 115 | elapsed:    7.2s finished
Features: 114/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 114 out of 114 | elapsed:    7.4s finished
Features: 113/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 113 out of 113 | elapsed:    6.6s finished
Features: 112/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:    6.8s finished
Features: 111/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 

Features: 81/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done  66 out of  81 | elapsed:    1.9s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:    2.5s finished
Features: 80/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:    2.5s finished
Features: 79/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  64 out of  79 | elapsed:    1.8s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  79 out of  79 | elapsed:    2.2s finished
Features: 78/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  52 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done  63 out of  78 | elapsed:    1.8s remaining:    0.4

[Parallel(n_jobs=-1)]: Done  44 out of  44 | elapsed:    0.9s finished
Features: 43/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of  43 | elapsed:    0.4s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  43 out of  43 | elapsed:    0.7s finished
Features: 42/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 out of  42 | elapsed:    0.8s finished
Features: 41/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  26 out of  41 | elapsed:    0.6s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  41 out of  41 | elapsed:    0.8s finished
Features: 40/1[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  40 | elapsed:    0.8s remaining:    0.4s
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    1.0s finished
Features: 39/1[Parallel(n_jobs=-1)]: Using

GradientBoostingRegressor(random_state=1)


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done 115 out of 115 | elapsed:   12.7s finished
Features: 1/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 114 out of 114 | elapsed:   15.2s finished
Features: 2/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done 113 out of 113 | elapsed:   23.4s finished
Features: 3/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   15.1s
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:   38.1s finished
Features: 4/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done 

[Parallel(n_jobs=-1)]: Done  79 out of  79 | elapsed:  2.1min finished
Features: 37/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done  78 out of  78 | elapsed:  2.1min finished
Features: 38/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   57.3s
[Parallel(n_jobs=-1)]: Done  77 out of  77 | elapsed:  2.0min finished
Features: 39/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  76 out of  76 | elapsed:  2.2min finished
Features: 40/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  75 out of  75 | elapsed:  2.1min finished
Features: 41/115[Parallel(n_j

[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:  2.1min finished
Features: 76/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  39 out of  39 | elapsed:  1.8min finished
Features: 77/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 out of  38 | elapsed:  2.9min finished
Features: 78/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 out of  37 | elapsed:  2.9min finished
Features: 79/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  36 out of  36 | elapsed:  2.8min finished
Features: 80/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:  2.5min finished
Features: 81/115[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]:

### Selected features

In [None]:
if target_features[0] == 'GrainYield':
    training_features = [
        'Blue'
        'Green', 'MTCI', 'EVI', 'VARI', 'GLI', 'WDRVI',
        'MODE Minimum air temperature at 2m altitude (TN)',
        'SUM Relative humidity in 2m.1',
        'MAX Average temperature at 2m altitude (TM)', 'STD_DEV Wind gust (FG2))'
    ]
elif target_features[0] == 'Days2Maturity':
    export_path = './Data/4. results_dm/'
    export_path_comparability = './Data/4. results_dm/comparability/'