In [1]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import euclidean_distances, accuracy_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
class KNNClassifier(BaseEstimator, ClassifierMixin):
        def __init__(self, K=3):
            self.K = K

        def fit(self, X, y):
            X, y = check_X_y(X, y)
            self.classes_ = unique_labels(y)
            self.X_ = X
            self.y_ = y
            return self

        def predict(self, X):
            check_is_fitted(self)
            X = check_array(X)

            distances = euclidean_distances(X, self.X_)
            K_nearest = np.argsort(distances, axis=1)[:, :self.K]

            K_nearest_labels = self.y_[K_nearest]
            top_labels = [Counter(row_labels).most_common(1)[0][0] for row_labels in K_nearest_labels]
            return top_labels

In [3]:
base = pd.read_csv('re8.csv')
X = base['text']
y = base['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test_transformed = vectorizer.transform(X_test)

In [5]:
clf = KNNClassifier(K=6)

In [6]:
clf.fit(X_train.toarray(), y_train)

In [7]:
predictions = clf.predict(X_test_transformed.toarray())
accuracy = accuracy_score(y_test, predictions)
cp = classification_report(y_test, predictions)

print('Accuracy:', accuracy)
print(cp)

Accuracy: 0.9003257328990228
              precision    recall  f1-score   support

         acq       0.97      0.82      0.89       497
       crude       0.88      0.91      0.89        74
        earn       0.90      0.98      0.94       754
       grain       1.00      0.43      0.60         7
    interest       0.75      0.79      0.77        57
       money       0.69      0.82      0.75        44
        ship       0.91      0.62      0.74        32
       trade       0.82      0.91      0.86        70

    accuracy                           0.90      1535
   macro avg       0.87      0.79      0.81      1535
weighted avg       0.91      0.90      0.90      1535

