In [1]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import os
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

In [2]:
def create_path(*args):
    cur_path = os.getcwd()
    for value in args:
        cur_path  = os.path.join(cur_path, value)
    return cur_path

In [3]:
class modeling():

    def __init__(self, quest_num, mat_type, dim = 100):
        self.Y_test = np.load(create_path('labels', '{}_labels_question{}.npy'.format('test', quest_num)))
        self.Y_train = np.load(create_path('labels', '{}_labels_question{}.npy'.format('train', quest_num)))
        if mat_type == 'tfidf':
            print('ok')
            self.X_test = np.load(create_path('tfidf_matrices', 'TFIDF_{}_Question{}_{}dim.npy'.format('test', quest_num, dim)))
            self.X_train = np.load(create_path('tfidf_matrices', 'TFIDF_{}_Question{}_{}dim.npy'.format('train', quest_num, dim)))
        elif mat_type == 'w2v':
            self.X_test = np.load(create_path('w2v_matrices', 'Question{}{}_{}dimensions.npy'.format(quest_num, 'test', dim)))
            self.X_train = np.load(create_path('w2v_matrices', 'Question{}{}_{}dimensions.npy'.format(quest_num, 'train', dim)))
            
    def multinomialnb(self):
        print("NaiveBayes", end = ' ')
        clf = MultinomialNB().fit(self.X_train, self.Y_train)
        predicted = clf.predict(self.X_test)
        print(classification_report(self.Y_test, predicted))
        
    def mlpc(self, hls = 50, max_it = 500):
        print("MLP", end = ' ')
        mlp = MLPClassifier(hidden_layer_sizes=hls,max_iter=max_it)
        #Y_train1 = np.asarray(Y_train, dtype=np.float64).tolist()
        mlp.fit(self.X_train, self.Y_train)
        predicted = mlp.predict(self.X_test)
        print(classification_report(self.Y_test, predicted))

    def decisiontrees(self, max_depth = 5):
        print("tree", end = ' ')
        clf = tree.DecisionTreeClassifier(max_depth)
        clf.fit(self.X_train, self.Y_train)
        predicted = clf.predict(self.X_test)
        print(classification_report(self.Y_test, predicted))
    
    def randomforest(self, n_est = 100, max_d = 20):
        print("RandomForest", end = ' ')
        clf = RandomForestClassifier(n_estimators=n_est, max_depth=max_d, random_state=13)
        clf.fit(self.X_train,self.Y_train)
        predicted = clf.predict(self.X_test)
        print(classification_report(self.Y_test, predicted))

In [7]:
data = modeling(1, 'w2v', dim = 200)

In [8]:
data.randomforest()

RandomForest               precision    recall  f1-score   support

         000       0.00      0.00      0.00         5
         001       0.00      0.00      0.00         2
         010       0.00      0.00      0.00         7
         011       0.50      0.10      0.17        10
         100       0.69      0.88      0.77        42
         101       0.21      0.88      0.34         8
         110       0.91      0.73      0.81        84
         111       0.79      0.83      0.81        46

   micro avg       0.71      0.71      0.71       204
   macro avg       0.39      0.43      0.36       204
weighted avg       0.73      0.71      0.70       204



  'precision', 'predicted', average, warn_for)
