/
model_params.py
73 lines (70 loc) · 4.03 KB
/
model_params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Imports
"""
import ast
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import copy
def best_model_params(grid_search_pd,data_str):
"""
Method that delivers the best model and its parameters according to the Grid Search done
Input grid_search_pd: DataFrame containing the parameters of the models tested in the Grid Search
Input data_str: String containing the name of the dataset
Output best: The name of the best performing model
Output params_best: The parameters of the best performing model
Output params_rf: The parameters of the RF model
"""
if data_str in ['synthetic_disease','compass','adult','heart','cervical']:
best = 'rf'
elif data_str in ['synthetic_simple','synthetic_athlete','ionosphere','credit','german']:
best = 'mlp'
params_best = ast.literal_eval(grid_search_pd.loc[(data_str,best),'params'])[0]
params_rf = ast.literal_eval(grid_search_pd.loc[(data_str,'rf'),'params'])[0]
return best, params_best, params_rf
def clf_model(model_str,best_params,rf_params,train_data,train_target):
"""
Method that outputs the best trained model according to Grid Search done
Input model_str: The name of the best performing model
Input best_params: Parameters of the best performing model
Input rf_params: Parameters of the RF model
Input train_data: Training dataset
Input train_target: Target of the training dataset
Output model: Trained best performing model
"""
random_st = 54321
if model_str == 'svm':
best_C = best_params['C']
best_coef0 = best_params['coef0']
best_degree = best_params['degree']
best_kernel = best_params['kernel']
best_model = svm.SVC(C=best_C, coef0=best_coef0, degree=best_degree, kernel=best_kernel)
best_model.fit(train_data,train_target)
rf_model = RandomForestClassifier(max_depth=rf_params['max_depth'], min_samples_leaf=rf_params['min_samples_leaf'], min_samples_split=rf_params['min_samples_split'], n_estimators=rf_params['n_estimators'])
rf_model.fit(train_data,train_target)
elif model_str == 'dt':
best_max_depth = best_params['max_depth']
best_min_samples_leaf = best_params['min_samples_leaf']
best_min_samples_split = best_params['min_samples_split']
best_model = DecisionTreeClassifier(max_depth=best_max_depth, min_samples_leaf=best_min_samples_leaf, min_samples_split=best_min_samples_split)
best_model.fit(train_data,train_target)
rf_model = RandomForestClassifier(max_depth=rf_params['max_depth'], min_samples_leaf=rf_params['min_samples_leaf'], min_samples_split=rf_params['min_samples_split'], n_estimators=rf_params['n_estimators'])
rf_model.fit(train_data,train_target)
elif model_str == 'mlp':
best_activation = best_params['activation']
best_hidden_layer_sizes = best_params['hidden_layer_sizes']
best_solver = best_params['solver']
best_model = MLPClassifier(activation=best_activation, hidden_layer_sizes=best_hidden_layer_sizes, solver=best_solver, random_state=random_st)
best_model.fit(train_data,train_target)
rf_model = RandomForestClassifier(max_depth=rf_params['max_depth'], min_samples_leaf=rf_params['min_samples_leaf'], min_samples_split=rf_params['min_samples_split'], n_estimators=rf_params['n_estimators'])
rf_model.fit(train_data,train_target)
elif model_str == 'rf':
best_max_depth = best_params['max_depth']
best_min_samples_leaf = best_params['min_samples_leaf']
best_min_samples_split = best_params['min_samples_split']
best_n_estimators = best_params['n_estimators']
best_model = RandomForestClassifier(max_depth=best_max_depth, min_samples_leaf=best_min_samples_leaf, min_samples_split=best_min_samples_split, n_estimators=best_n_estimators)
best_model.fit(train_data,train_target)
rf_model = copy.deepcopy(best_model)
return best_model, rf_model