# Crypto Trading Feature Importance Analysis for Deep Reinforcement Learning

Notebook based on:

0: Lopez de Prado, M. (2018). Advances in financial machine learning. John Wiley & Sons.

https://www.amazon.com/Advances-Financial-Machine-Learning-Marcos/dp/1119482089

1: AI4Finance Foundation

https://github.com/AI4Finance-Foundation

2: Optimal Trading Rules Detection with Triple Barrier Labeling

https://www.youtube.com/watch?v=U2CxilKFue4

3: Data Labelling, the Triple-barrier Method

https://towardsdatascience.com/the-triple-barrier-method-251268419dcd


4: Financial Machine Learning Part 1: Labels

https://towardsdatascience.com/financial-machine-learning-part-1-labels-7eeed050f32e#:~:text=Adding%20Path%20Dependency%3A%20Triple%2DBarrier,%3A%20the%20triple%2Dbarrier%20method.


5: Meta-Labeling: Solving for Non Stationarity and Position Sizing

https://www.youtube.com/watch?v=WbgglcXfEzA


6: Advances in Financial Machine Learning

https://github.com/JackBrady/Financial-Machine-Learning/blob/master/USDJPY_Notebook.ipynb


Specifically, important features for currently one coin at the time only

In [None]:
# Install required packages

%cd /tmp
!pip install wrds
!pip install swig
!git clone https://github.com/AI4Finance-Foundation/FinRL-Meta
%cd /tmp/FinRL-Meta/
!pip install git+https://github.com/AI4Finance-Foundation/ElegantRL.git
# !pip install -q condacolab
# import condacolab
# condacolab.install()
!apt-get update -y -qq && apt-get install -y -qq cmake libopenmpi-dev python3-dev zlib1g-dev libgl1-mesa-glx swig
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git
!pip install gputil
!pip install trading_calendars
!pip install python-binance


In [None]:
#install TA-lib (technical analysis)
# !wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz 
# !tar xvzf ta-lib-0.4.0-src.tar.gz
# import os
# os.chdir('ta-lib') 
# !./configure --prefix=/usr
# !make
# !make install
# os.chdir('../')
# !cd
# !pip install TA-Lib
!pip install ta-lib

In [None]:
# Other imports

import scipy as sp
import math
import pandas as pd
import requests
import json
import matplotlib.dates as mdates
import numpy as np
import pickle
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from meta.data_processor import DataProcessor

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import seaborn as sns

from datetime import datetime, timedelta
from talib.abstract import MACD, RSI, CCI, DX
from binance.client import Client
from pandas.testing import assert_frame_equal
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

from sklearn.preprocessing import MinMaxScaler 
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from IPython.display import display, HTML

#from google.colab import files

In [None]:
# Plot settings

SCALE_FACTOR = 2

#plt.style.use('seaborn')
plt.rcParams['figure.figsize'] = [5 * SCALE_FACTOR, 2 * SCALE_FACTOR]
plt.rcParams['figure.dpi'] = 300 * SCALE_FACTOR
plt.rcParams['font.size'] = 5 * SCALE_FACTOR
plt.rcParams['axes.labelsize'] = 5 * SCALE_FACTOR
plt.rcParams['axes.titlesize'] = 6 * SCALE_FACTOR
plt.rcParams['xtick.labelsize'] = 4 * SCALE_FACTOR
plt.rcParams['ytick.labelsize'] = 4 * SCALE_FACTOR
plt.rcParams['font.family'] = 'serif'

# Part 1: Adapted Binance downloader 


Any features you think of are probably coming out of OHLCV data or alternative data streams. The functions required to obtain the new features are added here and added to the eventual dataframe.

## 1.1 Set contants and use BinanceProcessor


In [None]:
# Set constants:

ticker_list = ['ETHUSDT']


TIME_INTERVAL = '1h'

TRAIN_START_DATE = '2015-01-01'
TRAIN_END_DATE= '2019-08-01'
TRADE_START_DATE = '2019-08-01'
TRADE_END_DATE = '2020-01-03'


technical_indicator_list = ['macd',
                             'macd_signal',
                             'macd_hist',
                             'rsi',
                             'cci',
                             'dx'
                             ]

if_vix = False

In [None]:
# Process data using unified data processor
p = DataProcessor(data_source='binance', start_date=TRAIN_START_DATE, end_date=TRADE_END_DATE, time_interval=TIME_INTERVAL)
p.download_data(ticker_list=ticker_list)
p.clean_data()
df = p.dataframe

In [None]:
df.head()

## 1.2 Add technical indicators


In [None]:
def add_technical_indicator(df, tech_indicator_list):
    # print('Adding self-defined technical indicators is NOT supported yet.')
    # print('Use default: MACD, RSI, CCI, DX.')

    final_df = pd.DataFrame()
    for i in df.tic.unique():
        tic_df = df[df.tic == i].copy()
        tic_df['rsi'] = RSI(tic_df['close'], timeperiod=14)
        tic_df['macd'], tic_df['macd_signal'], tic_df['macd_hist'] = MACD(tic_df['close'], fastperiod=12,
                                                                          slowperiod=26, signalperiod=9)
        tic_df['cci'] = CCI(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        tic_df['dx'] = DX(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        final_df = final_df._append(tic_df)
    return final_df

In [None]:
processed_df=add_technical_indicator(df,technical_indicator_list)
processed_df.tail()

In [None]:
def add_technical_indicator(df, tech_indicator_list):
    # print('Adding self-defined technical indicators is NOT supported yet.')
    # print('Use default: MACD, RSI, CCI, DX.')

    final_df = pd.DataFrame()
    for i in df.tic.unique():
        tic_df = df[df.tic == i].copy()
        tic_df['rsi'] = RSI(tic_df['close'], timeperiod=14)
        tic_df['macd'], tic_df['macd_signal'], tic_df['macd_hist'] = MACD(tic_df['close'], fastperiod=12,
                                                                          slowperiod=26, signalperiod=9)
        tic_df['cci'] = CCI(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        tic_df['dx'] = DX(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        final_df = final_df.append(tic_df)
    return final_df

In [None]:
def add_technical_indicator(df, tech_indicator_list):
    # print('Adding self-defined technical indicators is NOT supported yet.')
    # print('Use default: MACD, RSI, CCI, DX.')

    final_df = pd.DataFrame()
    for i in df.tic.unique():
        tic_df = df[df.tic == i].copy()
        tic_df['rsi'] = RSI(tic_df['close'], timeperiod=14)
        tic_df['macd'], tic_df['macd_signal'], tic_df['macd_hist'] = MACD(tic_df['close'], fastperiod=12,
                                                                          slowperiod=26, signalperiod=9)
        tic_df['cci'] = CCI(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        tic_df['dx'] = DX(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        final_df = final_df.append(tic_df)
    return final_df

In [None]:
def add_technical_indicator(df, tech_indicator_list):
    # print('Adding self-defined technical indicators is NOT supported yet.')
    # print('Use default: MACD, RSI, CCI, DX.')

    final_df = pd.DataFrame()
    for i in df.tic.unique():
        tic_df = df[df.tic == i].copy()
        tic_df['rsi'] = RSI(tic_df['close'], timeperiod=14)
        tic_df['macd'], tic_df['macd_signal'], tic_df['macd_hist'] = MACD(tic_df['close'], fastperiod=12,
                                                                          slowperiod=26, signalperiod=9)
        tic_df['cci'] = CCI(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        tic_df['dx'] = DX(tic_df['high'], tic_df['low'], tic_df['close'], timeperiod=14)
        final_df = final_df.append(tic_df)
    return final_df

In [None]:
# Drop unecessary columns and make time as index
processed_df.index=pd.to_datetime(processed_df.time)
processed_df.drop('time', inplace=True, axis=1)
print(processed_df.tail(20))

# Part 2: Triple barrier method/Data Labeling
Introduction here:

https://www.youtube.com/watch?v=U2CxilKFue4

## 2.1 Add volatility


In [None]:
def get_vol(prices, span=100):
    # 1. compute returns of the form p[t]/p[t-1] - 1
    df0 = prices.pct_change()
    # 2. estimate rolling standard deviation
    df0 = df0.ewm(span=span).std()
    return df0

In [None]:
data_ohlcv = processed_df.assign(volatility=get_vol(processed_df.close)).dropna()

In [None]:
data_ohlcv.head(10)

## 2.2 Adding Path Dependency: Triple-Barrier Method

To better incorporate the stop-loss and take-profit scenarios of a hypothetical trading strategy, we will modify the fixed-horizon labeling method so that it reflects which barrier has been touched first — upper, lower, or horizon. Hence the name: the triple-barrier method.

The labeling schema is defined as follows:

* y = 2 : top barrier is hit first
* y = 1 : right barrier is hit first
* y = 0 : bottom barrier is hit first

What about the side of the bet?

The schema above works fine for long-only strategies, however things get more complicated when we allow for both long and short bets. If we are betting short, our profit/loss is inverted relative to the price action — we profit if the price goes down and we lose when the price goes up.

In order to account for this, we can simply represent side as 2 for long and 0 for short. Thus we can multiply our returns by the side, so whenever we’re betting short the negative returns become positive and vice-versa. Effectively, we flip the y = 2 and y = 0 labels if side = 0.

Let’s take a shot at the implementation (based on MLDP’s code).
First, we define the procedure for getting the timestamps of the horizon barriers:

### Create function to obtain the barrier hits

In [None]:
def get_barriers():
  #create a container
  barriers = pd.DataFrame(columns=['days_passed', 
            'price', 'vert_barrier', \
            'top_barrier', 'bottom_barrier'], \
              index = daily_volatility.index)
  for day, vol in daily_volatility.items():
    days_passed = len(daily_volatility.loc \
                  [daily_volatility.index[0] : day])
    #set the vertical barrier 
    if (days_passed + t_final < len(daily_volatility.index) \
        and t_final != 0):
        vert_barrier = daily_volatility.index[
                            days_passed + t_final]
    else:
        vert_barrier = np.nan
    #set the top barrier
    if upper_lower_multipliers[0] > 0:
        top_barrier = prices.loc[day] + prices.loc[day] * \
                      upper_lower_multipliers[0] * vol
    else:
        #set it to NaNs
        top_barrier = pd.Series(index=prices.index)
    #set the bottom barrier
    if upper_lower_multipliers[1] > 0:
        bottom_barrier = prices.loc[day] - prices.loc[day] * \
                      upper_lower_multipliers[1] * vol
    else: 
        #set it to NaNs
        bottom_barrier = pd.Series(index=prices.index)
        
    barriers.loc[day, ['days_passed', 'price', 'vert_barrier','top_barrier', 'bottom_barrier']] = \
    days_passed, prices.loc[day], vert_barrier, top_barrier, bottom_barrier

  return barriers

In [None]:
# Set barrier parameters

daily_volatility = data_ohlcv['volatility']
t_final = 25
upper_lower_multipliers = [2, 2]
price = data_ohlcv['close']
prices = price[daily_volatility.index]

In [None]:
barriers = get_barriers()
barriers

## 2.3 Function to get label for the dataset (0, 1, 2)

* 0: hit the stoploss
* 1: hit the time out
* 2: hit the profit take

The part in this function (commented out), allows for easy conversion to a regression analysis (currently it is classification). If one changes the labels to (-1, 0, 1), and change the hit on the vertical barrier to the function stated below.

That will make hitting the profit take barrier 1, the vertical barrier a range from (-1, 1), and the stoploss barrier -1. This is a continuos space then.

```
barriers['out'][i] = max(
          [(price_final - price_initial)/ 
            (top_barrier - price_initial), \
            (price_final - price_initial)/ \
            (price_initial - bottom_barrier)],\
            key=abs)
```

In [None]:
def get_labels():
    """
    start: first day of the window
    end:last day of the window
    price_initial: first day stock price
    price_final:last day stock price
    top_barrier: profit taking limit
    bottom_barrier:stop loss limt
    condition_pt:top_barrier touching conditon
    condition_sl:bottom_barrier touching conditon
    """
    barriers["label_barrier"] = None
    for i in range(len(barriers.index)):
        start = barriers.index[i]
        end = barriers.vert_barrier[i]
        if pd.notna(end):
            # assign the initial and final price
            # price_initial = barriers.price[start]
            # price_final = barriers.price[end]
            # assign the top and bottom barriers
            top_val = barriers.top_barrier[i]
            bottom_val = barriers.bottom_barrier[i]
            top_barrier = top_val if np.isscalar(top_val) else (top_val.iloc[0] if not top_val.empty else np.nan)
            bottom_barrier = bottom_val if np.isscalar(bottom_val) else (bottom_val.iloc[0] if not bottom_val.empty else np.nan)

            # set the profit taking and stop loss conditons
            idx = data_ohlcv.index.slice_indexer(start, end)
            slice_close = data_ohlcv['close'].iloc[idx]
            condition_pt = (slice_close >= top_barrier).any()
            condition_sl = (slice_close <= bottom_barrier).any()

            # assign the labels
            if condition_pt:
                barriers["label_barrier"][i] = 2
            elif condition_sl:
                barriers["label_barrier"][i] = 0
            else:
                # Change to regression analysis by switching labels (-1, 0, 1)
                # and uncommenting the alternative function for vert barrier
                barriers["label_barrier"][i] = 1
                # barriers['label_barrier'][i] = max(
                #           [(price_final - price_initial)/
                #             (top_barrier - price_initial), \
                #             (price_final - price_initial)/ \
                #             (price_initial - bottom_barrier)],\
                #             key=abs)
    return



In [None]:
# Use function to produce barriers

get_labels()
barriers

# Merge the barriers with the main dataset and drop the last t_final + 1 barriers (as they are too close to the end)

data_ohlcv = data_ohlcv.merge(barriers[['vert_barrier', 'top_barrier', 'bottom_barrier', 'label_barrier']], left_on='time', right_on='time')
data_ohlcv.drop(data_ohlcv.tail(t_final + 1).index,inplace = True)
data_ohlcv.head(5)

In [None]:
# Count barrier hits ( 0 = stoploss, 1 = timeout, 2 = profit take)
pd.Series(data_ohlcv['label_barrier']).value_counts()

In [None]:
fig,ax = plt.subplots()
plt.xticks(rotation=45)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))


TIMESTAMP_TO_PLOT= 300

ax.set(title='ETH/USDT',
       xlabel='date', ylabel='price')
ax.plot(data_ohlcv.close[200:600])

start = data_ohlcv.index[TIMESTAMP_TO_PLOT]
end = data_ohlcv.vert_barrier[TIMESTAMP_TO_PLOT]
upper_barrier = data_ohlcv.top_barrier[TIMESTAMP_TO_PLOT]
lower_barrier = data_ohlcv.bottom_barrier[TIMESTAMP_TO_PLOT]

ax.plot([start, end], [upper_barrier, upper_barrier], 'r--');
ax.plot([start, end], [lower_barrier, lower_barrier], 'r--');
ax.plot([start, end], [(lower_barrier + upper_barrier)*0.5, \
                       (lower_barrier + upper_barrier)*0.5], 'r--');
ax.plot([start, start], [lower_barrier, upper_barrier], 'r-');
ax.plot([end, end], [lower_barrier, upper_barrier], 'r-');

# Part 3: Copying the Neural Network present in ElegantRL ActorPPO agent.

In ElegantRL from AI4Finance, all the preprogrammed Agents are present:

https://github.com/AI4Finance-Foundation/ElegantRL/blob/master/elegantrl/agents/net.py

Some of the actions output discrete actions (classification), and some continuous actions (regression). This notebook can be adapted for both. by turning the labeling method in a (-1, 0, 1) and changing the Neural network to output a continuous space between -1 and 1.

Therefore this notebook allows for analysis for both regression and classification.




In [None]:
data_ohlcv = data_ohlcv.drop(['vert_barrier', 'top_barrier', 'bottom_barrier','adjusted_close','tic'], axis = 1)

In [None]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()

        self.state_dim = 12       # all the features
        self.mid_dim = 2**10      # net dimension
        self.action_dim = 3       # output (sell/nothing/buy)

        # make a copy of the model in ActorPPO (activation function in forward function)

        # Original initial layers
        self.fc1 = nn.Linear(self.state_dim, self.mid_dim)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(self.mid_dim, self.mid_dim)

        # Original residual layers
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(self.mid_dim, self.mid_dim)
        self.hw1 = nn.Hardswish()
        self.fc_out = nn.Linear(self.mid_dim, self.action_dim)

    def forward(self, x):
        x = x.float()

        # Original initial layers
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)

        # Original residual layers
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.hw1(x)
        x = self.fc_out(x)
        return x

model_NN1 = Net()
print(model_NN1)

In [None]:
class ClassifierDataset(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data
        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

## 3.1 Set constants and train/test split

In [None]:
# Set constants
batch_size=16
epochs=300

# Reinitiating data here
data = data_ohlcv

X = data[['open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'cci', 'dx', 'volatility']].values
y = np.squeeze(data[['label_barrier']].values).astype(int)

# Split into train+val and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=69)

# Normalize input
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to numpy arrays
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

# initialize sets and convet them to Pytorch dataloader sets
train_dataset = ClassifierDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train.astype(int)).long())
test_dataset = ClassifierDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test.astype(int)).long())


train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size
                          )

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=1)

## 3.2 Check if GPU availabble

In [None]:
# Check GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# Set optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_NN1.parameters(), lr=0.0001)

In [None]:
## Make sure you are working on GPU
assert torch.cuda.is_available(), "Change your runtime to GPU! Currently working on CPU... zzzzz"

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

## 3.3 Now train the ```model_NN1```; as long as the test loss reduces keep on training!

In [None]:
# Train function
def train(fold, model, device, trainloader, optimizer, epoch):
  model.train()
  correct_train = 0
  correct_this_batch_train = 0
  total_train_loss = 0
  for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      output = model(data)
      train_loss = criterion(output, target.flatten())
      train_loss.backward()
      optimizer.step()

      if batch_idx % 100 == 0:
          print('Train Fold/Epoch: {}/{} [{}/{} ({:.0f}%)]\ttrain_loss: {:.6f}'.format(
              fold,epoch, batch_idx * len(data), len(train_loader.dataset),
              100. * batch_idx / len(train_loader), train_loss.item()))
          
      # Measure accuracy on train set
      total_train_loss += train_loss.item()
      _, y_pred_tags_train = torch.max(output, dim = 1)
      correct_this_batch_train = y_pred_tags_train.eq(target.flatten().view_as(y_pred_tags_train))
      correct_train += correct_this_batch_train.sum().item()
  
  return correct_train, train_loss

In [None]:
# Test function
def test(fold,model, device, test_loader, correct_train, train_loss):
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
      for data, target in test_loader:
          data, target = data.to(device), target.to(device)
          output = model(data)
          test_loss += criterion(output, target.flatten()).item()  # sum up batch loss

          # Measure accuracy on test set
          _, y_pred_tags = torch.max(output, dim = 1)
          correct_this_batch = y_pred_tags.eq(target.flatten().view_as(y_pred_tags))
          correct += correct_this_batch.sum().item() 

  test_loss /= len(test_loader.dataset)
  train_loss /= len(train_loader.dataset)

  # Print train accuracy for epoch
  # TODO: still a bug in summed up batch train loss 
  print('\nTrain set for fold {}: Average train_loss: {:.4f}, Accuracy: {}/{} ({:.5f}%)'.format(
  fold, train_loss, correct_train, len(train_loader.dataset),
  100 * correct_train / len(train_loader.dataset)))

  # Print test result for epoch
  print('Test set for fold {}:  Average test_loss:  {:.4f}, Accuracy: {}/{} ({:.5f}%)\n'.format(
      fold, test_loss, correct, len(test_loader.dataset),
      100 * correct / len(test_loader.dataset)))  

In [None]:
model_NN1.to(device)

# State fold (no PurgedKFold build yet, ignore this)
# took about 1hour to train when epochs=300

epochs=100
fold = 0
for epoch in range(1, epochs + 1):
  correct_train, train_loss = train(fold, model_NN1, device, train_loader, optimizer, epoch)
  test(fold, model_NN1, device, test_loader, correct_train, train_loss)

In [None]:
# Save model to disk and save in your own files to save you some time

from google.colab import files

filename = 'model_NN1'
out = open(filename, 'wb')

with open(filename + '.pkl', 'wb') as fid:
  pickle.dump(model_NN1, fid)

# load pickle file
with open(filename + '.pkl', 'rb') as fid:
     model_NN1 = pickle.load(fid)

files.download(filename + '.pkl')

In [None]:
# load pickle filetorch.from_numpy(y_test.astype(int)).long()

filename = 'model_NN1'
with open(filename + '.pkl', 'rb') as fid:
     model_NN1_pickle = pickle.load(fid)
model_NN1

## 3.4 Get classification report

In [None]:
with torch.no_grad():
  # Show accuracy on test set
  model_NN1.eval()

  # predict proba
  y_pred_nn1_proba = model_NN1(torch.from_numpy(X_test).float().to(device))
  y_pred_nn1 = torch.argmax(y_pred_nn1_proba, dim=1)
  y_pred_nn1 = y_pred_nn1.cpu().detach().numpy()

# print predction values
print('labels in prediction:', np.unique(y_pred_nn1), '\n')

# print report
label_names = ['long', 'no bet', 'short']
print(classification_report(y_test.astype(int), y_pred_nn1, target_names=label_names))

In [None]:
np.bincount(y_pred_nn1)

# Part 4: Feature Importance Analysis

After we have a working neural network model (up to 66% accuracy with this network size), we can do a pertubation of the columns and do a prediction. When a column is pertubated and it delivers the highest error, that means that column in most important for the prediction of the action.

## Pertubation Rank (PR)

In [None]:
def perturbation_rank(model,x,y,names):
    errors = []

    X_saved = x
    y = y.flatten()

    with torch.no_grad():
        model.eval()
        for i in range(x.shape[1]):

            # Convert to numpy, shuffle, convert back to tensor, predict
            x = x.detach().numpy()
            np.random.shuffle(x[:,i])
            x = torch.from_numpy(x).float().to(device)
            pred = model(x)

            # log_loss requires (classification target, probabilities)
            pred = pred.cpu().detach().numpy()
            error = metrics.log_loss(y, pred)
            errors.append(error)

            # Reset x to saved tensor matrix
            x = X_saved
    
    max_error = np.max(errors)
    importance = [e/max_error for e in errors]
    
    data = {'name':names,'error':errors,'importance':importance}
    result = pd.DataFrame(data,columns = ['name','error','importance'])
    result.sort_values(by=['importance'],ascending=[0],inplace=True)
    result.reset_index(inplace=True,drop=True)
    return result

In [None]:
model_NN1

In [None]:
names = list(data_ohlcv.columns)
names.remove('label_barrier')
rank = perturbation_rank(model_NN1, 
                         torch.from_numpy(X_test).float(),
                         torch.from_numpy(y_test.astype(int)).long(),  
                         names
                         )

display(rank)

In [None]:
names = list(data_ohlcv.columns)
names.remove('label_barrier')
rank = perturbation_rank(model_NN1, 
                         torch.from_numpy(X_test).float(),
                         torch.from_numpy(y_test.astype(int)).long(),  
                         names
                         )

display(rank)