## Hyperparameters

### hyper parameter testing
1. GridSearch
2. RandomSearch

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

data = pd.read_csv("diabetes.csv")

X = data.drop("Outcome", axis=1).to_numpy()
Y = data["Outcome"].to_numpy()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=99, stratify=Y)

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import ParameterGrid, GridSearchCV,  RandomizedSearchCV

In [5]:
hyeraparams  = {
    "class_weight": ["balanced"],
    "penalty":["l1","l2"],
    "max_iter":list(range(50,251,50)),
    "solver":["liblinear"],
    "C": [a/100 for a in range(50, 200,40)]
}

In [6]:
len(ParameterGrid(hyeraparams))

40

In [7]:
# Grid Search

model = LogisticRegression()

In [8]:
grid_search = GridSearchCV(
    estimator=model,
    param_grid=hyeraparams,
    scoring="recall",
    cv = 3,
    n_jobs=-1
)

In [9]:
grid_search.fit(X_train, y_train)

0,1,2
,estimator,LogisticRegression()
,param_grid,"{'C': [0.5, 0.9, ...], 'class_weight': ['balanced'], 'max_iter': [50, 100, ...], 'penalty': ['l1', 'l2'], ...}"
,scoring,'recall'
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,0.5
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,50


In [10]:
grid_search.best_score_

np.float64(0.7100286906624934)

In [11]:
grid_search.best_params_

{'C': 0.5,
 'class_weight': 'balanced',
 'max_iter': 50,
 'penalty': 'l1',
 'solver': 'liblinear'}

In [12]:
grid_search.best_estimator_

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,0.5
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,50


In [13]:
## Random Search

In [16]:
random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=hyeraparams,
    n_iter=10,
    scoring="recall",
    cv = 3,
    n_jobs=-1
)

In [17]:
random_search.fit(X_train, y_train)

0,1,2
,estimator,LogisticRegression()
,param_distributions,"{'C': [0.5, 0.9, ...], 'class_weight': ['balanced'], 'max_iter': [50, 100, ...], 'penalty': ['l1', 'l2'], ...}"
,n_iter,10
,scoring,'recall'
,n_jobs,-1
,refit,True
,cv,3
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,0.9
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,50


In [18]:
random_search.best_score_

np.float64(0.7100286906624934)

In [19]:
random_search.best_params_

{'solver': 'liblinear',
 'penalty': 'l1',
 'max_iter': 50,
 'class_weight': 'balanced',
 'C': 0.9}

In [20]:
random_search.best_estimator_

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,0.9
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'liblinear'
,max_iter,50
