# Implementing gridsearchcv for FIGS

In [7]:
import pandas as pd
import numpy as np
import modelinghelper as helper
from imodels import FIGSClassifier
import os
import joblib
# supress future warnings
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

In [9]:
#set up random seed
rng = 42

# set up folder to save results
output_path = 'gridsearch_all_models'
dataset = 'export'
mod_name = "FIGS"
run_num = "run_1"

if not os.path.exists(output_path):
    os.makedirs(output_path)
else:
    print("Folder already exists")

# file name
prefix = f'{output_path}'

Folder already exists


In [3]:
# read in data
p = '../data/'
input_name = 'cleaned.csv'

df = pd.read_csv(f'{p}{input_name}')
# convert month to string
df['ship_date_mm'] = df['ship_date_mm'].astype(str)

In [4]:
non_feature_cols = ['control_number', 'disp_date', 'i_e', 'ship_date',
                    'cartons', 'qty', 'unit', 'specific_generic_name',
                    'genus', 'species', 'wildlf_cat',
                    'disp_date_yyyy', 'disp_date_mm', 'disp_ship_date']

target = ['act', 'dp_cd']

feature_cols = ['species_code', 'wildlf_desc', 'wildlf_cat',
                'ctry_org', 'ctry_ie','purp', 'src', 'trans_mode', 'pt_cd',
                'value', 'ship_date_mm']

export_df = helper.df_filtering(df, i_e = 'E', f_cols = feature_cols)

In [5]:
X_train, X_test, y_train, y_test = helper.data_split(export_df)

### FIGS Export

In [6]:
clf_figs = FIGSClassifier(random_state=rng)

figs_params = {
    'clf__max_rules' : np.arange(1,50),
    #early stopping?
}
figs_pipe = helper.gridsearch_pipeline(X_train, y_train, clf_figs, figs_params)

Fitting 5 folds for each of 49 candidates, totalling 245 fits
[CV 1/5] END ..................clf__max_rules=1;, score=0.106 total time=   2.1s
[CV 2/5] END ..................clf__max_rules=1;, score=0.135 total time=   2.0s
[CV 3/5] END ..................clf__max_rules=1;, score=0.101 total time=   1.9s
[CV 4/5] END ..................clf__max_rules=1;, score=0.114 total time=   2.1s
[CV 5/5] END ..................clf__max_rules=1;, score=0.000 total time=   2.3s
[CV 1/5] END ..................clf__max_rules=2;, score=0.106 total time=   2.5s
[CV 2/5] END ..................clf__max_rules=2;, score=0.135 total time=   2.8s
[CV 3/5] END ..................clf__max_rules=2;, score=0.101 total time=   3.1s
[CV 4/5] END ..................clf__max_rules=2;, score=0.114 total time=   3.3s
[CV 5/5] END ..................clf__max_rules=2;, score=0.078 total time=   3.1s
[CV 1/5] END ..................clf__max_rules=3;, score=0.221 total time=   3.4s
[CV 2/5] END ..................clf__max_rules=3

In [12]:
joblib.dump(figs_pipe, f'{prefix}/figs_pipe_export.joblib')

['gridsearch_all_models/figs_pipe_export.joblib']