<a href="https://colab.research.google.com/github/frankie711/UTS_ML2019_Main/blob/master/ASS2_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#-*-coding:utf-8-*-
#!/usr/bin/python3 

import sys
import re
import time

config = {
        "data" : {
            "split_rate" : 0.8
            },
        "model" : {
            "model_type" : 3 # 1:bayes or 2:tree or 3:svm
            }
        }

def load_data():
    '''
     load dataset : sklearn digits

     Args: 
        NULL
     Teturns: 
        digits

     data type : bunch
     data structure : 
        ['data', 'target', 'target_names', 'images', 'DESCR']
        data(ndarray) : [1797, 64]
        target(ndarray) : 1797
        target_names : [0 1 2 3 4 5 6 7 8 9]
        images : [1797, 8, 8]
    '''

    from sklearn import datasets
    digits = datasets.load_digits()

    #print (digits.keys())

    #print (digits.data[0])
    #print (digits.images.shape)
    #print (digits.target.shape)
    #print (digits.target)
    #print (digits.target_names)
    #print (digits.data)

    return digits

def split_train_test(digits):
    '''
     Split dataset

     Args:
        digits

     Returns:
        x_train, y_train, x_test, y_test
    '''

    trainset_size = int(digits.data.shape[0] * config["data"]["split_rate"])
    x_train = digits.data[:trainset_size]
    y_train = digits.target[:trainset_size]
    x_test = digits.data[trainset_size:]
    y_test = digits.target[trainset_size:]
    return x_train, y_train, x_test, y_test

def model_train(x_train, y_train, model_type = 1):
    '''
        machine learning model
        in the program, we used several models and compared them.
        
        Args:
            x_train : train dataset
            y_train : train labels
            model_type : 
                1. LogisticRegression (Default)
                2. DT
                3. SVM

        Returns:
            model
    '''
    if model_type == 1:
        # MultinomialNB 
        from sklearn.naive_bayes import MultinomialNB
        classifier = MultinomialNB()  
    elif model_type == 2:
        # tree
        from sklearn import tree
        classifier = tree.DecisionTreeClassifier()
    elif model_type == 3:
        # SVM
        from sklearn import svm
        classifier = svm.SVC(gamma = 0.001,C = 100)

    # train model
    classifier.fit(x_train, y_train)  
    return classifier

def model_predict(classifier, x_test):
    '''
    model predict

    args:
        classifier : model
        x_test : test dataset

    Rerurns:
        predict result 
    '''

    y_predict = classifier.predict(x_test)  
    return y_predict


def model_eva(y_test, y_predict, digits):
    '''
        evaulation model

        Args:
            y_test : true labels
            y_test : predict labels

        Returns:
            metrics
    '''

    # eva1 
    from sklearn.metrics import classification_report
    target_names = digits.target_names
    target_names = [str(x) for x in target_names]
    print (classification_report(y_test, y_predict, target_names = target_names))

    from sklearn import metrics
    accuracy = metrics.accuracy_score(y_test, y_predict)
    precision_macro = metrics.precision_score(y_test, y_predict, average="macro")
    recall_macro =  metrics.recall_score(y_test, y_predict, average='macro')
    f1_macro = metrics.f1_score(y_test, y_predict, average="macro") 

    precision_micro = metrics.precision_score(y_test, y_predict, average="micro")
    recall_micro = metrics.recall_score(y_test, y_predict, average='micro')
    f1_micro = metrics.f1_score(y_test, y_predict, average="micro")    

    print ("%20s: %.4f" % ("precision_macro", precision_macro))
    print ("%20s: %.4f" % ("recalll_macro", recall_macro))
    print ("%20s: %.4f" % ("f1_macro", f1_macro))
    print ("%20s: %.4f" % ("precision_micro", precision_micro))
    print ("%20s: %.4f" % ("recall_micro", recall_micro))
    print ("%20s: %.4f" % ("f1_micro", f1_micro))
    

if __name__ == '__main__':
    # step 1
    digits = load_data()

    # step 2
    x_train, y_train, x_test, y_test = split_train_test(digits)

    # step 3 
    classifier = model_train(x_train, y_train, config["model"]["model_type"])

    # step 4
    y_predict = model_predict(classifier, x_test)

    # step 5
    model_eva(y_test, y_predict, digits)


              precision    recall  f1-score   support

           0       1.00      0.97      0.99        35
           1       0.97      1.00      0.99        36
           2       1.00      1.00      1.00        35
           3       1.00      0.84      0.91        37
           4       0.97      0.92      0.94        37
           5       0.93      1.00      0.96        37
           6       1.00      1.00      1.00        37
           7       1.00      1.00      1.00        36
           8       0.86      0.97      0.91        33
           9       0.92      0.95      0.93        37

    accuracy                           0.96       360
   macro avg       0.97      0.96      0.96       360
weighted avg       0.97      0.96      0.96       360

     precision_macro: 0.9655
       recalll_macro: 0.9644
            f1_macro: 0.9637
     precision_micro: 0.9639
        recall_micro: 0.9639
            f1_micro: 0.9639
