# Model

In [87]:
from sklearn import neighbors, datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np


### use ready dataset
iris = datasets.load_iris()
X, y = iris.data[:, :2], iris.target 


# spliting TRAIN and TEST data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)


# scaling data
scaler = preprocessing.StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)



# knn model
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# knn model prediction
y_pred = knn.predict(X_test)
accuracy_score(y_test, y_pred)


0.631578947368421

In [88]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.42      0.73      0.53        11
           2       0.73      0.42      0.53        19

   micro avg       0.63      0.63      0.63        38
   macro avg       0.72      0.72      0.69        38
weighted avg       0.70      0.63      0.63        38



In [89]:
# GridSearch 

from sklearn.model_selection import GridSearchCV

params = {"n_neighbors": [1,2,3,4,5,6,7,8,9,10], 
          "metric": ["euclidean", "cityblock"]} 

grid = GridSearchCV(estimator=knn, param_grid=params)

grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_estimator_.n_neighbors)

0.8303571428571429
2




In [90]:
# # Linear Regression
# from sklearn.linear_model import LinearRegression
# lr = LinearRegression(normalize=True)   

# lr.fit(X, y)
# y_pred = lr.predict(X_test)
# accuracy_score(y_test, y_pred)


In [91]:
# Support Vector Machines (SVM)
from sklearn.svm import SVC 
svc = SVC(kernel='linear') 

svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
accuracy_score(y_test, y_pred) , print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.46      0.55      0.50        11
           2       0.71      0.63      0.67        19

   micro avg       0.68      0.68      0.68        38
   macro avg       0.72      0.73      0.72        38
weighted avg       0.70      0.68      0.69        38



(0.6842105263157895, None)

In [92]:
# Naive Bayes 
from sklearn.naive_bayes import GaussianNB  
gnb = GaussianNB()

gnb.fit(X_train, y_train)
y_pred = gnb.predict(X_test)
accuracy_score(y_test, y_pred) , print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       0.50      0.64      0.56        11
           2       0.75      0.63      0.69        19

   micro avg       0.71      0.71      0.71        38
   macro avg       0.75      0.76      0.75        38
weighted avg       0.73      0.71      0.72        38



(0.7105263157894737, None)

In [93]:
# Principal Component Analysis (PCA)
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)

X_train = pca.fit_transform(X_train)  
X_test = pca.transform(X_test)  
pca.explained_variance_ratio_  


array([0.56132873, 0.43867127])

In [100]:
# k_means

from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3, random_state=0)

X_train = k_means.fit_transform(X_train)  
X_test = k_means.transform(X_test)  
k_means.predict(X_test)
k_means.score(X_test, y_test)

-10.847865285578129

In [101]:
# Random Forest

from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=10, random_state=0)  
regressor.fit(X_train, y_train)  
y_pred = regressor.predict(X_test)  

from sklearn import metrics

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred))) 

Mean Absolute Error: 0.41644736842105257
Mean Squared Error: 0.29712887426900586
Root Mean Squared Error: 0.5450952891642028


In [102]:
params = {"n_estimators": range(2,20), 
          "criterion": ["mse", "mae"],
         "max_depth": range(1,20)} 

grid = GridSearchCV(estimator=regressor, param_grid=params , cv=9)

grid.fit(X_train, y_train)
print(grid.best_score_)
print(grid.best_estimator_)

0.7335430563770373
RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=19, n_jobs=None,
           oob_score=False, random_state=0, verbose=0, warm_start=False)




In [106]:
# Cross validation 

from sklearn.model_selection import cross_val_score

print(cross_val_score(grid, X_train, y_train, cv=4))



[0.76750739 0.77911932 0.70324338 0.78234794]


