In [1]:
import pandas as pd
import numpy as np
import pickle
import random
from sklearn.model_selection  import train_test_split
from sklearn.svm import SVC
import cv2
import matplotlib.pyplot as plt

In [2]:
pickle_file = open('flower_data.pickle', 'rb')

data = pickle.load(pickle_file)

In [3]:
random.shuffle(data)

In [4]:
features = []
labels = []

for feature, label in data:
    features.append(feature)
    labels.append(label)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.25, random_state = 0)

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [9]:
model_params = {
    'svm' : {
        'model' : SVC(gamma='auto'),
        'params' : {
            'C':[1,10,20],
            'kernel': ['linear', 'rbf']
        }
    },
    
    'random_forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    
    'logisitc_regression' : {
        'model' : LogisticRegression(solver='liblinear', multi_class='auto'),
        'params' : {
            'C':[1,5,10]
        }
    },
        
    'gaussian_nb' : {
        'model' : GaussianNB(),
        'params' : {
        }
    },
        
    'multinomial_nb' : {
        'model' : MultinomialNB(),
        'params' : {
            'alpha' : [1.0,3.0,5.0]
        }
    },
        
    'decision_tree' : {
        'model' : DecisionTreeClassifier(),
        'params' : {
            'criterion':['gini', 'entropy'],
            'splitter': ['best', 'random']
        }
    }
}

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
scores = []

for model_name, m_prms in model_params.items():
    clf_hpt = GridSearchCV(m_prms['model'], m_prms['params'], cv=5, return_train_score=False)
    clf_hpt.fit(X_train, y_train)
    scores.append({
        'model': model_name,
        'best_score': clf_hpt.best_score_,
        'best_params': clf_hpt.best_params_
    })

In [13]:
df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.554297,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.564091,{'n_estimators': 10}
2,logisitc_regression,0.537739,{'C': 5}
3,gaussian_nb,0.563368,{}
4,multinomial_nb,0.562599,{'alpha': 1.0}
5,decision_tree,0.552799,"{'criterion': 'gini', 'splitter': 'best'}"


In [14]:
from sklearn.neighbors import KNeighborsClassifier

In [27]:
knn = KNeighborsClassifier(n_neighbors=30)

In [28]:
knn.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=30)

In [29]:
knn.score(X_test, y_test)

0.6357466063348416