# LOFO for GPU

## Bibliotecas Utilizadas

In [1]:
import numpy as np
import cupy as cp
import pandas as pd
import datetime

import xgboost as xgb
from sklearn.metrics import make_scorer, mean_absolute_error
from sklearn.model_selection import cross_validate

from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer, mean_squared_error

## Leave One Feature Out

In [2]:
def lofo_df(df,features,feature_out):
    if feature_out is not None:
        df = df.drop(feature_out,axis=1)
    gpu_matrix = cp.asarray(df[[feature for feature in features if feature != feature_out]])
    #gpu_matrix = xgb.DMatrix(gpu_matrix)
    return gpu_matrix

In [3]:
def lofo_score(X,y,features,feature_out,model,cv,scoring,n_jobs):
    feature_df = lofo_df(X,features,feature_out=feature_out)
    lofo_score = cross_validate(estimator=model,X=feature_df,y=y,scoring=scoring,cv=cv,n_jobs=n_jobs)
    return lofo_score['test_score'].mean()

In [4]:
def LOFO_GPU_Importance(X,y,features,model,cv,scoring,n_jobs):
    base_score = lofo_score(X,y,features,None,model,cv,scoring,n_jobs)
    scores = np.empty(0)
    i = 0
    for feature_out in features:
        i = i + 1
        start_time = datetime.datetime.now()
        feature_score = lofo_score(X,y,features,feature_out,model,cv,scoring,n_jobs)
        scores = np.append(scores,base_score-feature_score)
        end_time = datetime.datetime.now()
        delta = end_time - start_time
        print('{}/{} {}.{} s/it'.format(i,len(features),delta.seconds,delta.microseconds))
    importance_df = pd.DataFrame()
    importance_df["feature"] = features
    importance_df["score"] = scores
    return importance_df

In [5]:
x_train = pd.read_csv('Data/X_train.csv')
y_train = pd.read_csv('Data/Y_train.csv')

In [6]:
features = x_train.drop(['ID','WF','Time'],axis=1).columns
model = xgb.XGBRegressor(tree_method='gpu_hist',max_depth=5)
cv = KFold(n_splits=5, shuffle=False, random_state=0)
scoring = make_scorer(mean_squared_error, greater_is_better=False,)
n_jobs=2

In [7]:
importance_df = LOFO_GPU_Importance(x_train,y_train['Production'],features,model,cv,scoring,n_jobs)

1/102 7.264136 s/it
2/102 7.234714 s/it
3/102 7.202005 s/it
4/102 7.158949 s/it
5/102 7.188807 s/it
6/102 7.163283 s/it
7/102 7.280936 s/it
8/102 7.223827 s/it
9/102 7.382163 s/it
10/102 7.223342 s/it
11/102 7.256457 s/it
12/102 7.338202 s/it
13/102 7.217708 s/it
14/102 7.191141 s/it
15/102 7.140882 s/it
16/102 7.210700 s/it
17/102 7.257592 s/it
18/102 7.331901 s/it
19/102 7.289508 s/it
20/102 7.158624 s/it
21/102 7.249617 s/it
22/102 7.159917 s/it
23/102 7.186498 s/it
24/102 7.187481 s/it
25/102 7.243357 s/it
26/102 7.187218 s/it
27/102 7.295850 s/it
28/102 7.214551 s/it
29/102 7.219694 s/it
30/102 7.194304 s/it
31/102 7.235757 s/it
32/102 7.202760 s/it
33/102 7.185488 s/it
34/102 7.240440 s/it
35/102 7.290490 s/it
36/102 7.219548 s/it
37/102 7.230807 s/it
38/102 7.292827 s/it
39/102 7.259373 s/it
40/102 7.203755 s/it
41/102 7.312894 s/it
42/102 7.209828 s/it
43/102 7.206783 s/it
44/102 7.212189 s/it
45/102 7.214177 s/it
46/102 7.202066 s/it
47/102 7.241635 s/it
48/102 7.311629 s/it
4

In [10]:
importance_df.sort_values(by='score',ascending=False)

Unnamed: 0,feature,score
42,NWP2_12h_D-1_U,0.071381
18,NWP1_12h_D-1_U,0.070456
21,NWP1_18h_D-1_U,0.067885
90,NWP4_00h_D-1_U,0.065905
57,NWP3_18h_D-2_U,0.060995
...,...,...
29,NWP1_06h_D_T,-0.027390
11,NWP1_18h_D-2_T,-0.030797
25,NWP1_00h_D_V,-0.036174
97,NWP4_00h_D_V,-0.044534


In [109]:
x_train.columns

Index(['ID', 'WF', 'Time', 'NWP1_00h_D-2_U', 'NWP1_00h_D-2_V',
       'NWP1_00h_D-2_T', 'NWP1_06h_D-2_U', 'NWP1_06h_D-2_V', 'NWP1_06h_D-2_T',
       'NWP1_12h_D-2_U',
       ...
       'NWP4_00h_D-1_CLCT', 'NWP4_12h_D-1_U', 'NWP4_12h_D-1_V',
       'NWP4_12h_D-1_CLCT', 'NWP4_00h_D_U', 'NWP4_00h_D_V', 'NWP4_00h_D_CLCT',
       'NWP4_12h_D_U', 'NWP4_12h_D_V', 'NWP4_12h_D_CLCT'],
      dtype='object', length=105)

In [11]:
x_train

Unnamed: 0,ID,WF,Time,NWP1_00h_D-2_U,NWP1_00h_D-2_V,NWP1_00h_D-2_T,NWP1_06h_D-2_U,NWP1_06h_D-2_V,NWP1_06h_D-2_T,NWP1_12h_D-2_U,...,NWP4_00h_D-1_CLCT,NWP4_12h_D-1_U,NWP4_12h_D-1_V,NWP4_12h_D-1_CLCT,NWP4_00h_D_U,NWP4_00h_D_V,NWP4_00h_D_CLCT,NWP4_12h_D_U,NWP4_12h_D_V,NWP4_12h_D_CLCT
0,1,WF1,01/05/2018 01:00,,,,,,,,...,,,,,1.254603,-0.289687,82.543144,,,
1,2,WF1,01/05/2018 02:00,,,,,,,,...,,,,,2.490908,-0.413370,99.990844,,,
2,3,WF1,01/05/2018 03:00,,,,,,,,...,,,,,0.997093,-1.415138,98.367235,,,
3,4,WF1,01/05/2018 04:00,,,,,,,,...,,,,,0.689598,-0.961441,94.860604,,,
4,5,WF1,01/05/2018 05:00,,,,,,,,...,,,,,0.290994,-0.294963,95.905879,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37370,37371,WF6,15/01/2019 20:00,,,,,,,-1.7901,...,-0.000013,0.675391,-0.984372,-0.000015,0.560460,-0.938410,-0.000016,0.614775,-0.884509,-0.00002
37371,37372,WF6,15/01/2019 21:00,,,,,,,-1.7966,...,-0.000013,0.499687,-0.651881,-0.000015,0.368801,-0.654958,-0.000016,0.322804,-0.747691,-0.00002
37372,37373,WF6,15/01/2019 22:00,,,,,,,-1.6205,...,-0.000013,0.261570,-0.767298,-0.000015,0.129741,-0.636798,-0.000016,0.118189,-0.707538,-0.00002
37373,37374,WF6,15/01/2019 23:00,,,,,,,-1.2611,...,-0.000013,-0.030139,-0.537230,-0.000015,0.055551,-0.546788,-0.000016,-0.160884,-0.532798,-0.00002
