In [288]:
import pandas as pd
import matplotlib.pyplot as plt
import time

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score, precision_score, recall_score

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

In [289]:
data_train = pd.read_csv('train_nov28_task1.csv')
data_test = pd.read_csv('test_nov28_task1_only_features.csv')

le=LabelEncoder()
le.fit(data_train['class'])
data_train['class']=le.transform(data_train['class'])

In [290]:
def summarize_classification(y_test, y_pred):
    acc = accuracy_score(y_test, y_pred, normalize=True)
    num_acc = accuracy_score(y_test, y_pred, normalize=False)

    prec = precision_score(y_test, y_pred, average = 'weighted')
    recall = recall_score(y_test, y_pred, average = 'weighted')

    print("Test data count: ", len(y_test))
    print("accuracy_count: ", num_acc)
    print("accuracy_score: ", acc)
    print("precision score: ", prec)
    print("recall score: ", recall)


def Predict_model(mode, x_train, y_train):
    start = time.time()
    
    print("================")
    print(f"{mode}")
    
    if mode == 'Logistic':
        model = LogisticRegression(max_iter=10000).fit(x_train, y_train)
    elif mode == 'NaiveBayse':
        model = GaussianNB().fit(x_train, y_train)
    elif mode == 'DecisionTree':
        model = DecisionTreeRegressor().fit(x_train, y_train)
    elif mode == 'KNN':
        model = KNeighborsClassifier(n_neighbors=3).fit(x_train, y_train)
    elif mode == 'SVM':
        model = svm.SVC().fit(x_train, y_train)

    y_pred = model.predict(x_test)
    
    summarize_classification(y_test, y_pred)

    y_pred = y_pred.astype(int)  
    y_pred = le.inverse_transform(y_pred)
    
    print(f'{y_pred=}')
    
    
    end = time.time()
    print(f'執行時間: {end - start} 秒\n')



In [294]:
X = data_train[data_train.columns[:-1]]
Y = data_train['class']

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

x_test.shape

(720, 5)

In [292]:
Predict_model('Logistic', x_train, y_train)
Predict_model('NaiveBayse', x_train, y_train)
Predict_model('DecisionTree', x_train, y_train)
Predict_model('KNN', x_train, y_train)
Predict_model('SVM', x_train, y_train)

Logistic
Test data count:  720
accuracy_count:  539
accuracy_score:  0.7486111111111111
precision score:  0.7483576062027708
recall score:  0.7486111111111111
y_pred=array(['A', 'B', 'C', 'A', 'B', 'A', 'C', 'A', 'C', 'A', 'C', 'C', 'C',
       'C', 'A', 'C', 'B', 'A', 'A', 'C', 'A', 'C', 'A', 'B', 'A', 'B',
       'B', 'A', 'C', 'B', 'A', 'B', 'C', 'A', 'A', 'C', 'A', 'A', 'C',
       'B', 'C', 'B', 'C', 'A', 'B', 'A', 'C', 'A', 'B', 'A', 'B', 'A',
       'B', 'B', 'B', 'C', 'B', 'A', 'C', 'B', 'C', 'B', 'A', 'B', 'A',
       'C', 'C', 'C', 'B', 'B', 'C', 'A', 'C', 'B', 'A', 'C', 'C', 'C',
       'A', 'A', 'A', 'B', 'B', 'B', 'B', 'A', 'B', 'B', 'B', 'C', 'A',
       'A', 'A', 'A', 'B', 'C', 'C', 'A', 'B', 'A', 'B', 'C', 'A', 'A',
       'C', 'C', 'C', 'A', 'C', 'C', 'A', 'C', 'A', 'A', 'B', 'B', 'B',
       'C', 'A', 'C', 'A', 'B', 'A', 'A', 'B', 'B', 'B', 'B', 'C', 'C',
       'C', 'C', 'B', 'B', 'C', 'C', 'C', 'B', 'C', 'A', 'A', 'B', 'B',
       'A', 'A', 'B', 'A', 'C', 'C', 'A', 

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


Test data count:  720
accuracy_count:  657
accuracy_score:  0.9125
precision score:  0.9125845692327779
recall score:  0.9125
y_pred=array(['A', 'B', 'C', 'A', 'B', 'A', 'C', 'A', 'B', 'A', 'C', 'B', 'C',
       'C', 'B', 'B', 'C', 'A', 'A', 'C', 'C', 'C', 'A', 'B', 'C', 'B',
       'B', 'A', 'C', 'C', 'B', 'A', 'A', 'A', 'A', 'C', 'A', 'A', 'B',
       'C', 'C', 'B', 'C', 'A', 'B', 'A', 'A', 'A', 'C', 'A', 'A', 'A',
       'B', 'B', 'B', 'C', 'B', 'B', 'C', 'A', 'C', 'B', 'A', 'B', 'A',
       'C', 'C', 'C', 'B', 'B', 'B', 'A', 'C', 'A', 'A', 'C', 'C', 'C',
       'A', 'A', 'A', 'C', 'B', 'B', 'B', 'A', 'C', 'C', 'B', 'C', 'C',
       'A', 'A', 'A', 'A', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'C',
       'C', 'A', 'C', 'A', 'C', 'C', 'A', 'C', 'B', 'A', 'B', 'B', 'C',
       'C', 'A', 'C', 'A', 'B', 'C', 'A', 'C', 'B', 'B', 'B', 'C', 'C',
       'C', 'C', 'B', 'B', 'C', 'B', 'C', 'B', 'B', 'A', 'C', 'B', 'B',
       'B', 'A', 'A', 'A', 'C', 'A', 'A', 'B', 'A', 'A', 'A', 'B', 'C',
   