In [1]:
import numpy as np
import pandas as pd
from binerizer import *
from DNFRuleModel import DNFRuleModel
from MasterModel import MasterModel
from RuleGenerator import RuleGenerator
from Classifier import Classifier
from sklearn.model_selection import train_test_split

In [2]:
## Load + Binerize Data
fico = pd.read_csv('data/fico_clean.csv')
fico_bin = binerizeData(fico, verbose = False)

fico_X =fico_bin.to_numpy()[:,0:(fico_bin.shape[1]-1)]
fico_Y = fico_bin.to_numpy()[:,fico_bin.shape[1]-1]

In [3]:
# Num to return test
price_limit = 45
train_limit = 300
c = 15
return_nums = [10, 20, 50, 100, 200, 999999999]

accuracies = []
accuracies_full = []
saved_rules = None

for r in return_nums:
    accuracy = []
    args = {'ruleComplexity': c, 'numRulesToReturn': r}
    for i in range(5):
        X_train, X_test, y_train, y_test = train_test_split(fico_X, fico_Y, test_size=0.2)
        adult_classif = Classifier(X_train, y_train, args, ruleGenerator = 'Generic')
        adult_classif.fit(initial_rules = saved_rules, 
                          verbose = False, 
                          timeLimit = train_limit, 
                          timeLimitPricing = price_limit)
        
        acc = sum(adult_classif.predict(X_test) == y_test)/len(y_test)
        accuracy.append(acc)
        print(acc)
        
        rules = adult_classif.ruleMod.rules
        if saved_rules is None:
            saved_rules = rules
        else:
            saved_rules = np.unique(np.concatenate([saved_rules,rules]), axis = 0)
    accuracies.append(np.mean(accuracy))
    accuracies_full.append(accuracy)

hey your config said to return this many rules:  10
Using license file /Users/connorlawless/gurobi.lic
Academic license - for non-commercial use only
0.7074569789674953
hey your config said to return this many rules:  10
0.6955066921606119
hey your config said to return this many rules:  10
Time limit for column generation exceeded. Solving MIP.
0.7141491395793499
hey your config said to return this many rules:  10
Time limit for column generation exceeded. Solving MIP.
0.7007648183556405
hey your config said to return this many rules:  10
Time limit for column generation exceeded. Solving MIP.
0.6964627151051626
hey your config said to return this many rules:  20
Time limit for column generation exceeded. Solving MIP.
0.6959847036328872
hey your config said to return this many rules:  20
Time limit for column generation exceeded. Solving MIP.
0.6993307839388145
hey your config said to return this many rules:  20
Time limit for column generation exceeded. Solving MIP.
0.722275334608030

In [21]:
len(adult_classif.ruleMod.rules)

30184

In [17]:
res = pd.DataFrame(accuracies)
res['numReturn'] = return_nums

res.to_csv('results/numReturnHPTuning.csv')
best_num = return_nums[np.argmax(accuracies)]

[0, 1, 2]

In [None]:
!git add results/numReturnHPTuning.csv
!git commit -m 'pushing number to return hp tuning result'
!git push

In [None]:
# Num to return test
price_limit = 45
train_limit = 300
complexities = [1, 5, 10, 15, 20, 30]

accuracies = []
accuracies_full = []

for c in complexities:
    accuracy = []
    args = {'ruleComplexity': c, 'numRulesToReturn': best_num}
    for i in range(10):
        X_train, X_test, y_train, y_test = train_test_split(fico_X, fico_Y, test_size=0.2)
        adult_classif = Classifier(X_train, y_train, args, ruleGenerator = 'Generic')
        adult_classif.fit(verbose = False, timeLimit = train_limit, timeLimitPricing = price_limit)
        acc = sum(adult_classif.predict(X_test) == y_test)/len(y_test)
        accuracy.append(acc)
        print(acc)
    accuracies.append(np.mean(accuracy))
    accuracies_full.append(accuracy)

In [None]:
res = pd.DataFrame(accuracies)
res['complex'] = complexities

res.to_csv('results/complexHPTuning.csv')

In [10]:
!git add results/complexHPTuning.csv
!git commit -m 'pushing complexity hp tuning results'
!git push

[master 7499144] testing auto results push
 1 file changed, 32562 insertions(+)
 create mode 100644 test.csv
Counting objects: 3, done.
Delta compression using up to 8 threads.
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 486.86 KiB | 6.32 MiB/s, done.
Total 3 (delta 1), reused 0 (delta 0)
remote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
To github.com:conlaw/DecisionRulesColumnGen.git
   5b26763..7499144  master -> master
