# Rule-based classifier

In [32]:
from sklearn.preprocessing import StandardScaler, KBinsDiscretizer, OneHotEncoder
import wittgenstein as lw
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pandas as pd

In [33]:
train_data = pd.read_csv('../dataset/train_data.csv')
train_labels = pd.read_csv('../dataset/train_labels.csv')
train_labels = np.ravel(train_labels)

test_data = pd.read_csv('../dataset/test_data.csv')
test_labels = pd.read_csv('../dataset/test_labels.csv')

## Preprocessing Pipeline

In [34]:
cat_features = ['is_tarmac', 'length_cat', 'climb_cat']
num_features = [col for col in train_data.columns if col not in cat_features]

num_transformer = StandardScaler()
cat_transformer = OneHotEncoder(handle_unknown='ignore')

# preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, num_features),
        ('cat', cat_transformer, cat_features)
    ]
)

## RIPPER Model

In [38]:
#pipeline
from weakref import ref
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV


pipe = Pipeline(steps=[('preprocessor', preprocessor),
                       ('classifier', lw.RIPPER())])
param_grid = {
    'classifier__prune_size': [0.1, 0.2, 0.3, 0.4, 0.5],
    'classifier__dl_allowance': [0.1, 0.2, 0.3, 0.4, 0.5],
    'classifier__k': [2, 3, 4, 5, 6, 7, 8, 9, 10],
}

scoring = ['accuracy', 'precision', 'recall', 'f1']

#random grid
random_search = RandomizedSearchCV(pipe, 
                                   param_distributions=param_grid, 
                                   n_iter=3, 
                                   cv=5, verbose=2, 
                                   random_state=42, 
                                   n_jobs=-1,
                                   scoring=scoring, 
                                   refit='accuracy')
random_search.fit(train_data, train_labels)

print("Best parameters found: ", random_search.best_params_)
print("Best score found: ", random_search.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] END classifier__dl_allowance=0.5, classifier__k=2, classifier__prune_size=0.5; total time= 1.4min
[CV] END classifier__dl_allowance=0.3, classifier__k=8, classifier__prune_size=0.1; total time= 1.6min
[CV] END classifier__dl_allowance=0.1, classifier__k=3, classifier__prune_size=0.5; total time= 1.6min
[CV] END classifier__dl_allowance=0.1, classifier__k=3, classifier__prune_size=0.5; total time= 1.6min
[CV] END classifier__dl_allowance=0.5, classifier__k=2, classifier__prune_size=0.5; total time= 1.9min
[CV] END classifier__dl_allowance=0.5, classifier__k=2, classifier__prune_size=0.5; total time= 2.9min
[CV] END classifier__dl_allowance=0.1, classifier__k=3, classifier__prune_size=0.5; total time= 3.2min
[CV] END classifier__dl_allowance=0.5, classifier__k=2, classifier__prune_size=0.5; total time= 3.5min
[CV] END classifier__dl_allowance=0.3, classifier__k=8, classifier__prune_size=0.1; total time= 2.2min
[CV] END clas

In [39]:
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

model = random_search.best_estimator_

predictions = model.predict(test_data)

print('Accuracy ', accuracy_score(test_labels, predictions))
print('Precision ', precision_score(test_labels, predictions))
print('Recall ', recall_score(test_labels, predictions))
print('F1 Score ', f1_score(test_labels, predictions))

Accuracy  0.8590735347651984
Precision  0.5574614065180102
Recall  0.1027180783817952
F1 Score  0.17347211102215104
