In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import glob

img = cv2.imread('digits.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

#divide row 100, col 50
cells = [np.hsplit(row,100) for row in np.vsplit(gray,50)]
x = np.array(cells)
# print(x.shape)

# (20 x 20) in one row
train = x[:, :].reshape(-1, 400).astype(np.float32) 
# print(train.shape)

# There are 500 of '0' and 500 of '1' etc
k = np.arange(10)
train_labels = np.repeat(k,500) [:, np.newaxis]

np.savez("trained.npz", train = train, train_labels = train_labels)

plt.imshow(cv2.cvtColor(x[5,1] , cv2.COLOR_GRAY2RGB))
plt.show()

cv2.imwrite('test0.png', x[0, 0])
cv2.imwrite('test1.png', x[5, 0])
cv2.imwrite('test2.png', x[10, 0])
cv2.imwrite('test3.png', x[15, 0])
cv2.imwrite('test4.png', x[20, 0])
cv2.imwrite('test5.png', x[25, 0])
cv2.imwrite('test6.png', x[30, 0])
cv2.imwrite('test7.png', x[35, 0])
cv2.imwrite('test8.png', x[40, 0])
cv2.imwrite('test9.png', x[45, 0])

In [None]:
FILE_NAME = 'trained.npz'

# bring in trained data
def load_train_data(file_name) :
    with np.load(file_name) as data:
        train = data['train']
        train_labels = data['train_labels']
    return train, train_labels

# Scaling image (20x20)
def resize20(image) :
    img =cv2.imread(image)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray_resize = cv2.resize(gray, (20,20))
    plt.imshow(cv2.cvtColor(gray_resize, cv2.COLOR_GRAY2RGB))
    plt.show()
    return gray_resize.reshape(-1, 400).astype(np.float32)

def check(test, train, train_labels) :
    knn = cv2.ml.KNearest_create()
    knn.train(train, cv2.ml.ROW_SAMPLE, train_labels)
    #find the nearest 5 numbers 
    ret, result, neighbours, dist = knn.findNearest(test, k=5)
    return result

train, train_labels = load_train_data(FILE_NAME)

for file_name in glob.glob('test*.png') :
    test = resize20(file_name)
    result = check(test, train, train_labels)
    print(result)


In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set()

breast_cancer = load_breast_cancer()
X = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)
X = X[['mean area', 'mean compactness']]
y = pd.Categorical.from_codes(breast_cancer.target, breast_cancer.target_names)
y = pd.get_dummies(y, drop_first = True)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state =1)

knn = KNeighborsClassifier(n_neighbors =5, metric = 'euclidean')
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

# sns.scatterplot(
#     x = 'mean area',
#     y = 'mean compactness',
#     hue = 'benign',
#     data = X_test.join(y_test, how='outer')
# )

# plt.scatter(
#     X_test['mean area'],
#     X_test['mean compactness'],
#     c=y_pred,
#     cmap='coolwarm',
#     alpha=0.7
# )

confusion_matrix(y_test, y_pred)

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris 
from sklearn.metrics import precision_recall_fscore_support

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score


import seaborn as sns

X = np.loadtxt('iris.data',delimiter= ',', usecols=(0,1,2,3))
Y = np.loadtxt('iris.data',delimiter= ',', dtype= np.string_ ,usecols=4)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=42) 


knn = KNeighborsClassifier(n_neighbors=5,weights='distance')
knn.fit(x_train, y_train) 

# Predict on dataset which model has not seen before 

y_pred = knn.predict(x_test)
y_predict = knn.predict(x_train)
# confusion_matrix(y_test, y_pred)

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_test, y_pred)
accuracy2 = accuracy_score(y_train, y_predict)
print('Accuracy: %f' % accuracy)
print('Accuracy2: %f' % accuracy2)
# precision tp / (tp + fp)
# recall: tp / (tp + fn)
# f1: 2 tp / (2 tp + fp + fn)
precision_recall_fscore_support(y_test, y_pred, average='macro')
# precision_recall_fscore_support(y_train, y_predict, average='macro')

In [2]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score



train_dataset = np.loadtxt('shuttle.trn',delimiter= ' ',dtype = np.int32)
x_train = train_dataset[:, 0:-1]
y_train =train_dataset[:, [-1]]


#make sure 
# print(x_train)
# print(y_train)

test_dataset = np.loadtxt('shuttle.tst',delimiter= ' ',dtype = np.int32)
x_test = test_dataset[:, 0:-1]
y_test = test_dataset[:, [-1]]

#To decide the right k-value for the datase
# neighbors = np.arange(1, 9)
# test_accuracy = np.empty(len(neighbors))

# for i, k in enumerate(neighbors) :
#     knn = KNeighborsClassifier(n_neighbors=k,weights='distance')
#     knn.fit(x_train, y_train)
#     y_pred = knn.predict(x_test)
    
#     # Compute traning and test data accuracy 
#     test_accuracy[i] = accuracy_score(y_test, y_pred)

# plt.plot(neighbors, test_accuracy, label = 'Testing dataset Accuracy') 
  
# plt.legend() 
# plt.xlabel('n_neighbors') 
# plt.ylabel('Accuracy') 
# plt.show() 
    
knn = KNeighborsClassifier(n_neighbors=4,weights='distance')
knn.fit(x_train, y_train) 

y_pred = knn.predict(x_test)
y_predict = knn.predict(x_train)

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(y_test, y_pred)
accuracy2 = accuracy_score(y_train, y_predict)
# print('Accuracy: %f' % accuracy)
# print('Accuracy2: %f' % accuracy2)
# precision tp / (tp + fp)
# recall: tp / (tp + fn)
# f1: 2 tp / (2 tp + fp + fn)
# precision_recall_fscore_support(y_test, y_pred, average='macro')
print(precision_recall_fscore_support(y_train, y_predict, average='macro'))




(1.0, 1.0, 1.0, None)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder

def convert(data):
    number = preprocessing.LabelEncoder()
    data['protocol_type'] = number.fit_transform(data['protocol_type'])
    data['service'] = number.fit_transform(data['service'])
    data['src_bytes'] = number.fit_transform(data['src_bytes'])
    data = data.fillna(-999)
    return data



df = pd.read_csv('kddcup.data_10_percent_corrected', names = ['duration','protocol_type','service','src_bytes','dst_bytes','flag','land','wrong_fragment','urgent',
'hot','num_failed_logins','logged_in','num_compromised','root_shell','su_attempted','num_root','num_file_creations',
'num_shells','num_access_files','num_outbound_cmds','is_host_login','is_guest_login','count','serror_rate',
'rerror_rate','same_srv_rate','diff_srv_rate','srv_count','srv_serror_rate','srv_rerror_rate','srv_diff_host_rate',
'dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate','dst_host_same_src_port_rate',
'dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate','dst_host_srv_rerror_rate','class'])

XY = convert(df) 


array = df.values
prex = array[:, :-1]

X = preprocessing.normalize(prex, norm='l2')
Y = array[:, -1]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state=42) 

knn = KNeighborsClassifier(n_neighbors=3,weights='distance')
knn.fit(x_train, y_train) 
y_pred = knn.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: %f' % accuracy)
print(precision_recall_fscore_support(y_train, y_predict, average='macro'))

In [None]:
print("Hello")