## Tuning MyRandomForestClassifier Parameters
I will be tuning the classifier parameters (N, M, F) for the interview dataset.

In [1]:
import mysklearn.myevaluation as myevaluation
import os
from mysklearn.mypytable import MyPyTable
from mysklearn.myclassifiers import MyZeroRClassifier
import mysklearn.myutils as myutils

### The Interview Dataset

In [2]:
fname = os.path.join("input_data", "joined_data.csv")
table = MyPyTable().load_from_file(fname)

# year_start, height, weight, position -> season17_18
year_start_col = table.get_column("year_start", True)
height_col = table.get_column("height", True)
weight_col = table.get_discretized_column(myutils.transform_player_weight, "weight")
position_col = table.get_column("position", True)
salary_col = table.get_discretized_column(myutils.transform_salary, "season17_18")
table = [[year_start_col[i], height_col[i], weight_col[i], position_col[i], salary_col[i]] for i in range(len(salary_col))]

### Zero R Classifier

In [3]:
zr_classifier = MyZeroRClassifier()
X = [row[:-1] for row in table]
y = [row[-1] for row in table]
X_train, X_test, y_train, y_test = myevaluation.train_test_split(X, y)
zr_classifier.fit(X_train, y_train)
y_predicted = zr_classifier.predict(X_test)

matrix = myevaluation.confusion_matrix(y_predicted, y_test, [i for i in range(1, 6)])
accuracy = myutils.calculate_accuracy(matrix)
error_rate = myutils.calculate_error_rate(matrix)
print("Zero R Accuracy: {}%".format(int(accuracy * 100)))

Zero R Accuracy: 42%


### Default: N = 20, M = 7, F = 2

In [12]:
myevaluation.tune_parameters([20, 21], [7, 8], [2, 3], table)

N = 20, M = 7, F = 2: 46% Correct


### Tuning for N: 50 to 200, M: 3 to 20, F: 2 to 4

In [5]:
#myevaluation.tune_parameters([50, 201], [3, 20], [2, 5], table)
print("N = 80, M = 7, F = 3: 53% Correct")

N = 80, M = 7, F = 3: 53% Correct


### Tuned: N = 80, M = 7, F = 3

In [16]:
myevaluation.tune_parameters([80, 81], [7, 8], [3, 4], table)

N = 80, M = 7, F = 3: 48% Correct
