# Algoritmalar için ince ayar zamanı

In [None]:
import pandas
import numpy as np
from pandas import read_csv
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [None]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(url, names=names)

veri = data.values
X = veri[:,0:8]
Y = veri[:,8]

In [None]:
x_train, x_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=0, stratify=Y)

## 1. Grid Search

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
from sklearn.metrics import confusion_matrix

clf = LogisticRegression().fit(x_train,y_train)
y_pred = clf.predict(x_test)

print('Doğruluk : ' + str(accuracy_score(y_test,y_pred)))
print('Precision : ' + str(precision_score(y_test,y_pred)))
print('Recall : ' + str(recall_score(y_test,y_pred)))
print('F1 Puanı :' + str(f1_score(y_test,y_pred)))
print('\n')
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred)))

## Recall Değerini arttırmak için çalışalım

In [None]:
grid_değerleri = {'penalty': ['l1', 'l2'],'C':[0.0001,.009,0.01,0.05,.09,1,5,10,25]}
clf = LogisticRegression()
grid_model = GridSearchCV(clf, param_grid = grid_değerleri, scoring = 'recall')
grid_model.fit(x_train, y_train)

In [None]:
y_pred1 = grid_model.predict(x_test)

print('Doğruluk : ' + str(accuracy_score(y_test,y_pred1)))
print('Precision : ' + str(precision_score(y_test,y_pred1)))
print('Recall : ' + str(recall_score(y_test,y_pred1)))
print('F1 Puanı :' + str(f1_score(y_test,y_pred1)))
print('\n')
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred1)))

In [None]:
best_parameters = grid_model.best_params_  
print(best_parameters)  

In [None]:
grid_model.score(x_test, y_test)

In [None]:
logistic = LogisticRegression()
penalty = ['l1', 'l2']
C = [0.0001, 0.001, 0.01, 1, 100]

hyperparameters = dict(C=C, penalty=penalty)

clf = GridSearchCV(logistic, hyperparameters)
best_model = clf.fit(x_train, y_train)

#Print all the Parameters that gave the best results:
print('En iyi Parametreler',clf.best_params_)

print('En iyi Penaltı parametresi:', best_model.best_estimator_.get_params()['penalty'])
print('En iyi C:', best_model.best_estimator_.get_params()['C'])

In [None]:
y_pred2 = grid_model.predict(x_test)

print('Doğruluk : ' + str(accuracy_score(y_test,y_pred2)))
print('Precision : ' + str(precision_score(y_test,y_pred2)))
print('Recall : ' + str(recall_score(y_test,y_pred2)))
print('F1 Puanı :' + str(f1_score(y_test,y_pred2)))
print('\n')
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred2)))

## Örnek 2

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

sc = StandardScaler()
X_train_std = sc.fit_transform(x_train)
X_test_std = sc.transform(x_test)

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(X_train_std, y_train)

y_pred = classifier.predict(X_test_std)
print('Doğruluk : ' + str(accuracy_score(y_test,y_pred)))
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred)))

In [None]:
from sklearn.model_selection import cross_val_score
doğruluk_skorları = cross_val_score(estimator = classifier, X = X_train_std, y = y_train, cv = 10)
print("Ortalama değer: %",round(doğruluk_skorları.mean()*100,2))
print("std: %",round(doğruluk_skorları.std()*100))

In [None]:
from sklearn.model_selection import GridSearchCV

param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 20]
parameters = [{'C': param_range, 'kernel': ['linear']},
              {'C': param_range, 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.4, 0.6, 0.8, 0.9]},
              {'C': param_range, 'kernel': ['poly'],'degree':[2,3], 'gamma': [0.1, 0.2, 0.4, 0.5, 0.7, 0.9]}]
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy', #Puanlamayı doğruluğa göre yap
                           cv = 5, #K fold değeri
                           n_jobs = -1) # -1 bütün çekirdekleri kullan
grid_search = grid_search.fit(X_train_std, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_

In [None]:
print('En iyi tahmin oranı: %',round(best_accuracy*100,2))
print('En iyi parametreler: ',best_parameters)

## Örnek 3

In [None]:
from sklearn.tree import DecisionTreeClassifier
ml=DecisionTreeClassifier()

In [None]:
ml.fit(x_train, y_train)

y_pred = ml.predict(x_test)
print('Doğruluk : ' + str(accuracy_score(y_test,y_pred)))
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred)))

In [None]:
kriter=['gini','entropy']
derinlik=[3,5,8,10]
bölme=['best','random']
parametre=dict(criterion=kriter, max_depth=derinlik,splitter=bölme)
#parametre={'criterion':['gini','entropy'],'max_depth':[3,5,8,10],'splitter':['best','random']}

grid_değerleri = {'penalty': ['l1', 'l2'],'C':[0.0001,.009,0.01,0.05,.09,1,5,10,25]}

yeni_model=GridSearchCV(estimator=ml,cv=10,param_grid=parametre)
yeni_model.fit(x_train,y_train)

y_tahmin = yeni_model.predict(x_test)

print('Doğruluk : ' + str(accuracy_score(y_test,y_tahmin)))
print('Precision : ' + str(precision_score(y_test,y_tahmin)))
print('Recall : ' + str(recall_score(y_test,y_tahmin)))
print('F1 Puanı :' + str(f1_score(y_test,y_tahmin)))
print('\n')
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_tahmin)))

In [None]:
print(yeni_model.best_score_)
print(yeni_model.best_params_)

## 2. Rastgele Arama ile Parametre Optimizasyonu

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

grid_değerleri = {'C': uniform()}
model = LogisticRegression()
rassal_model = RandomizedSearchCV(estimator=model, param_distributions=grid_değerleri, n_iter=100)
rassal_model.fit(x_train, y_train)

y_pred1 = rassal_model.predict(x_test)

print('Doğruluk : ' + str(accuracy_score(y_test,y_pred1)))
print('Precision : ' + str(precision_score(y_test,y_pred1)))
print('Recall : ' + str(recall_score(y_test,y_pred1)))
print('F1 Puanı :' + str(f1_score(y_test,y_pred1)))
print('\n')
print('Sınıflandırma Tablosu : \n' + str(confusion_matrix(y_test,y_pred1)))

In [None]:
best_parameters = rassal_model.best_params_  
print(best_parameters)