# ÖRNEK 1

In [None]:
from pandas import read_csv
from pandas.tools.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

In [None]:
url = "https://goo.gl/mLmoIz"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = read_csv(url, names=names)

In [None]:
print(dataset.shape)

In [None]:
print(dataset.head(20))

In [None]:
print(dataset.describe())

In [None]:
print(dataset.groupby('class').size())

In [None]:
dataset.hist(figsize=(8,8))
pyplot.show()

In [None]:
scatter_matrix(dataset,figsize=(12,12))
pyplot.show()

In [None]:
import seaborn as sns
sns.pairplot(dataset, hue="class")

In [None]:
import pandas as pd
import seaborn as sns
sns.set(style="whitegrid", palette="muted")

# Load the example iris dataset
iris = sns.load_dataset("iris")

# "Melt" the dataset to "long-form" or "tidy" representation
iris = pd.melt(iris, "species", var_name="measurement")

# Draw a categorical scatterplot to show each observation
sns.swarmplot(x="measurement", y="value", hue="species",
              palette=["r", "c", "y"], data=iris)

In [None]:
array = dataset.values
X = array[:,0:4]
Y = array[:,4]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y,test_size=0.20, random_state=77)

In [None]:
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

sonuc = []
isim = []
for isim, model in models:
    kfold = KFold(n_splits=10, random_state=0)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    sonuc.append(cv_results)
    names.append(isim)
    msg = "%s: %f (%f)" % (isim, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_test)
print(accuracy_score(Y_test, predictions))
print(confusion_matrix(Y_test, predictions))
print(classification_report(Y_test, predictions))

# ÖRNEK 2

In [None]:
import numpy
from numpy import arange
from matplotlib import pyplot
from pandas import read_csv
from pandas import set_option
from pandas.tools.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import r2_score

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

bos = pd.DataFrame(boston.data)
bos.columns = boston.feature_names
bos['PRICE'] = boston.target

In [None]:
print(bos.shape)

In [None]:
print(bos.dtypes)

In [None]:
set_option('precision', 2)
print(bos.describe())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

j=bos.corr()
f, ax = plt.subplots(figsize=(9, 9))
sns.heatmap(j, annot=True, linewidths=.5, ax=ax)

In [None]:
bos.hist(sharex=False, sharey=False, xlabelsize=1, ylabelsize=1,figsize=(9, 9))
pyplot.show()

In [None]:
scatter_matrix(bos,figsize=(9, 9))
pyplot.show()

In [None]:
array = bos.values
X = array[:,0:13]
Y = array[:,13]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y,test_size=0.2, random_state=55)

In [None]:
models = []
models.append(('LR', LinearRegression()))
models.append(('LASSO', Lasso()))
models.append(('KNN', KNeighborsRegressor()))
models.append(('CART', DecisionTreeRegressor()))

In [None]:
sonuc = []
isim = []
for isim, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    sonuc.append(cv_results)
    names.append(isim)
    msg = "%s: %f (%f)" % (isim, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
pipelines = []
pipelines.append(('Scaled_LR', Pipeline([('Scaler', StandardScaler()),('LR',LinearRegression())])))
pipelines.append(('Scaled_LASSO', Pipeline([('Scaler', StandardScaler()),('LASSO',Lasso())])))
pipelines.append(('Scaled_KNN', Pipeline([('Scaler', StandardScaler()),('KNN',KNeighborsRegressor())])))
pipelines.append(('Scaled_CART', Pipeline([('Scaler', StandardScaler()),('CART',DecisionTreeRegressor())])))

sonuc = []
isim = []
for isim, model in pipelines:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results1 = cross_val_score(model, X_train, Y_train, cv=kfold)
    sonuc.append(cv_results1)
    names.append(isim)
    msg = "%s: %f (%f)" % (isim, cv_results1.mean(), cv_results1.std())
    print(msg)

In [None]:
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
k_values = numpy.array([1,3,5,7,9,11,13,15,17,19,21])
param_grid = dict(n_neighbors=k_values)
model = KNeighborsRegressor()
kfold = KFold(n_splits=10, random_state=7)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)

In [None]:
print("En iyi: %f %s ile" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) : %r" % (mean, stdev, param))

In [None]:
models = []
models.append(('GBM',GradientBoostingRegressor(n_estimators=150)))
models.append(('RF',RandomForestRegressor(n_estimators=150)))
models.append(('ET',ExtraTreesRegressor(n_estimators=150)))

results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
param_grid = dict(n_estimators=numpy.array([50,100,150,200,250,300,350,400]))
model = GradientBoostingRegressor(random_state=7)
kfold = KFold(n_splits=10, random_state=7)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kfold)
grid_result = grid.fit(X_train, Y_train)

In [None]:
print("En iyi: %f ,%s ile" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) : %r" % (mean, stdev, param))

### En iyi modeli artık test verisine uygulama zamanı

In [None]:
model = GradientBoostingRegressor(random_state=7, n_estimators=150)
model.fit(X_train, Y_train)

tahmin_train = model.predict(X_train)
  
print('R-kare (test): %.4f\n' % r2_score(Y_test, tahmin_test))

# ÖRNEK 3

In [None]:
import numpy
from matplotlib import pyplot
from pandas import read_csv
from pandas import set_option
from pandas.tools.plotting import scatter_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier

In [None]:
url = "https://goo.gl/NXoJfR"
dataset = read_csv(url, header=None)

In [None]:
print(dataset.shape)

In [None]:
print(dataset.dtypes)

In [None]:
set_option('display.width', 100)
set_option('precision', 2)
print(dataset.head(10))

In [None]:
set_option('precision', 2)
print(dataset.describe())

In [None]:
print(dataset.groupby(60).size())

In [None]:
dataset.hist(sharex=False, sharey=False, xlabelsize=1, ylabelsize=1,figsize=(9, 9))
pyplot.show()

In [None]:
fig = pyplot.figure(figsize=(19, 19))
ax = fig.add_subplot(111)
cax = ax.matshow(dataset.corr(), vmin=-1, vmax=1, interpolation='none')
fig.colorbar(cax)
pyplot.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

j=dataset.corr()
f, ax = plt.subplots(figsize=(19, 19))
sns.heatmap(j, annot=True, linewidths=.5, ax=ax)

In [None]:
array = dataset.values
X = array[:,0:60].astype(float)
Y = array[:,60]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,test_size=0.20, random_state=0)

In [None]:
models = []
models.append(('LR', LogisticRegression(solver='lbfgs')))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='scale')))

In [None]:
results = []
names = []
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
pipelines = []
pipelines.append(('Scaled_LR', Pipeline([('Scaler', StandardScaler()),('LR',LogisticRegression(solver='lbfgs'))])))
pipelines.append(('Scaled_KNN', Pipeline([('Scaler', StandardScaler()),('KNN',KNeighborsClassifier())])))
pipelines.append(('Scaled_NB', Pipeline([('Scaler', StandardScaler()),('NB',GaussianNB())])))
pipelines.append(('Scaled_SVM', Pipeline([('Scaler', StandardScaler()),('SVM', SVC())])))
results = []
names = []
for name, model in pipelines:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

In [None]:
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
neighbors = [1,3,5,7,9,11,13,15,17,19,21]
param_grid = dict(n_neighbors=neighbors)
model = KNeighborsClassifier()
kfold = KFold(n_splits=10, random_state=7)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)
print("En iyi: %f , %s ile"  % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) : %r" % (mean, stdev, param))

In [None]:
scaler = StandardScaler().fit(X_train)
rescaledX = scaler.transform(X_train)
c_values = [0.1, 0.3, 0.5, 0.7, 0.9, 1.0, 1.3, 1.5, 1.7, 2.0]
kernel_values = ['linear', 'poly', 'rbf', 'sigmoid']
param_grid = dict(C=c_values, kernel=kernel_values)
model = SVC(gamma='scale')
kfold = KFold(n_splits=10, random_state=7)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=kfold)
grid_result = grid.fit(rescaledX, Y_train)
print("En iyi: %f , %s ile" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) : %r" % (mean, stdev, param))

In [None]:
ensembles = []
ensembles.append(('GBM', GradientBoostingClassifier(n_estimators=100)))
ensembles.append(('RF', RandomForestClassifier(n_estimators=100)))
ensembles.append(('ET', ExtraTreesClassifier(n_estimators=100)))
results = []
names = []
for name, model in ensembles:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X_train, Y_train, cv=kfold)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

### modellerin gücünü ölçme zamanı

In [None]:
model = SVC(gamma='scale',C=2.0,kernel='rbf')
model.fit(rescaledX, Y_train)
# estimate accuracy on validation dataset
rescaledValidationX = scaler.transform(X_test)
predictions = model.predict(rescaledValidationX)
print(accuracy_score(Y_test, predictions))
print(confusion_matrix(Y_test, predictions))
print(classification_report(Y_test, predictions))