### Imports

In [1]:
# coding: utf-8
import string
import re
import pandas as pd
import numpy as np
import nltk
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import tree
from nltk.stem import RSLPStemmer
from graphviz import Source
from collections import Counter
from IPython.display import display
from IPython.display import SVG
# nltk.download('stopwords')
# nltk.download('rslp')
# conda install graphviz

In [2]:
# %%
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [3]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/victor/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [4]:
# method to remove stopwords
def RemoveStopWords(sent):
    stp = nltk.corpus.stopwords.words('portuguese')
    listW = ""
    for wrd in sent.split(" "):
        if wrd not in stp:
            listW += str(wrd)+" "
    return listW

In [5]:
def stemming(sent):
    stp = nltk.corpus.stopwords.words('portuguese')
    stemmer = nltk.stem.RSLPStemmer()
    listW = []
    for (desc, value) in sent:
        aux = [str(stemmer.stem(p))
                       for p in desc.split() if p not in stp]
        listW.append((aux, value))
    return listW

In [6]:
def tokenize(sent):
    sent = sent.lower()
    sent = nltk.tokenize.word_tokenize(sent, language='portuguese')
    return sent

In [7]:
def stemmingSimple(sent):
    stp = nltk.corpus.stopwords.words('portuguese')
    stemmer = RSLPStemmer()
    listW = []
    for wrd in sent:
        for p in wrd.split():
            if p not in stp:
                wrd = stemmer.stem(wrd.lower())
                listW.append(wrd)
    return listW

In [8]:
def addAllWords(sent):
    words = []
    for (desc, value) in sent:
        words.extend(desc)
    return words

In [9]:
def freqWords(sent):
    wrds = nltk.FreqDist(sent)
    return wrds

In [10]:
def uniqueWords(sent):
    freq = sent.keys()
    return freq

In [11]:
def extractWords(sent):
    doc = set(sent)
    characteristics = {}
    for wdr in u_words_train_Tespc:
        characteristics['%s' % wdr] = (wdr in doc)
    return characteristics

In [12]:
def models(X_train, Y_train, X_test, Y_test):
    from sklearn.linear_model import LogisticRegression
    log = LogisticRegression(random_state = 0)
    log.fit(X_train, Y_train)
    log_pred = log.predict(X_test)
    
    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
    knn.fit(X_train, Y_train)
    knn_pred = knn.predict(X_test)
    
    from sklearn.svm import SVC
    svc_lin = SVC(kernel='linear', random_state = 0)
    svc_lin.fit(X_train, Y_train)
    svc_lin_pred = svc_lin.predict(X_test)
    
    from sklearn.svm import SVC
    svc_rbf = SVC(kernel='rbf', random_state = 0)
    svc_rbf.fit(X_train, Y_train)
    svc_rbf_pred = svc_rbf.predict(X_test)
    
    from sklearn.naive_bayes import GaussianNB
    gauss = GaussianNB()
    gauss.fit(X_train, Y_train)
    gauss_pred = gauss.predict(X_test)
    
    from sklearn.tree import DecisionTreeClassifier
    tree = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
    tree.fit (X_train, Y_train)
    tree_pred = tree.predict(X_test)
    
    from sklearn.ensemble import RandomForestClassifier
    forest = RandomForestClassifier(n_estimators=10, criterion = 'entropy', random_state = 0)
    forest.fit(X_train, Y_train)
    forest_pred = forest.predict(X_test)
    
    from sklearn.linear_model import Perceptron
    pcp = Perceptron(random_state = 0)
    pcp.fit(X_train, Y_train)
    pcp_pred = pcp.predict(X_test)
    
    from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB
    mnb = MultinomialNB(alpha=.01)
    mnb.fit(X_train, Y_train)
    mnb_pred = mnb.predict(X_test)
    
    bnb = BernoulliNB(alpha=.01)
    bnb.fit(X_train, Y_train)
    bnb_pred = bnb.predict(X_test)
    
    cnb = ComplementNB(alpha=.1)
    cnb.fit(X_train, Y_train)
    cnb_pred = cnb.predict(X_test)
    
    print('[0]Logistic Regression Training Accuracy: ', log.score(X_train, Y_train))
    print('[0]Logistic Regression Training Accuracy: ', accuracy_score(Y_test, log_pred))
    print(classification_report(Y_test, log_pred))
    
    print('[1]KNeighborns Training Accuracy: ', knn.score(X_train, Y_train))
    print('[1]KNeighborns Training Accuracy: ', accuracy_score(Y_test, knn_pred))
    print(classification_report(Y_test, knn_pred))
    
    print('[2]SVC Linear Training Accuracy: ', svc_lin.score(X_train, Y_train))
    print('[2]SVC Linear Training Accuracy: ', accuracy_score(Y_test, svc_lin_pred))
    print(classification_report(Y_test, svc_lin_pred))
    
    print('[3]SVC RBF Training Accuracy: ', svc_rbf.score(X_train, Y_train))
    print('[3]SVC RBF Training Accuracy: ', accuracy_score(Y_test, svc_rbf_pred))
    print(classification_report(Y_test, svc_rbf_pred))
    
    print('[4]Gaussian NB Training Accuracy: ', gauss.score(X_train, Y_train))
    print('[4]Gaussian NB Training Accuracy: ', accuracy_score(Y_test, gauss_pred))
    print(classification_report(Y_test, gauss_pred))
    
    print('[5]Decision Tree Training Accuracy: ', tree.score(X_train, Y_train))
    print('[5]Decision Tree Training Accuracy: ', accuracy_score(Y_test, tree_pred))
    print(classification_report(Y_test, tree_pred))
    
    print('[6]Random Forest Training Accuracy: ', forest.score(X_train, Y_train))
    print('[6]Random Forest Training Accuracy: ', accuracy_score(Y_test, forest_pred))
    print(classification_report(Y_test, forest_pred))
    
    print('[7] Perceptron Training Accuracy: ', pcp.score(X_train, Y_train))
    print("[7] Perceptron Testing accuracy: ", accuracy_score(Y_test, pcp_pred))
    print(classification_report(Y_test, pcp_pred))
    
    print('[8] MultinomialNB Training Accuracy: ', mnb.score(X_train, Y_train))
    print("[8] MultinomialNB Testing accuracy: ", accuracy_score(Y_test, mnb_pred))
    print(classification_report(Y_test, pcp_pred))
    
    print('[9] BernoulliNB Training Accuracy: ', bnb.score(X_train, Y_train))
    print("[9] BernoulliNB Testing accuracy: ", accuracy_score(Y_test, bnb_pred))
    print(classification_report(Y_test, pcp_pred))

    print('[10] ComplementNB Training Accuracy: ', cnb.score(X_train, Y_train))
    print("[10] ComplementNB Testing accuracy: ", accuracy_score(Y_test, cnb_pred))
    print(classification_report(Y_test, pcp_pred))
    
    return log, knn, svc_lin, svc_rbf, gauss, tree, forest, pcp, mnb, bnb, cnb

In [13]:
def makeAcuracy(tree,x_test,y_test):
    predictions = clf.predict(x_test)
    erro = 0.0
    for x in range(len(predictions)):
        if predictions[x] != y_test[x]:
            erro += 1.
    acuracy = (1-(erro/len(predictions)))
    return acuracy

In [14]:
# ocomon_train_2.0 had your values analyseds individualy and it was seted the class manually
df_Tespc_train = pd.read_csv('CSV/ocomon_train_2.0.csv', usecols=['DESCRIB', 'ESPC'])
df_Tespc_test = pd.read_csv('CSV/ocomon_test_2.0.csv', usecols=['DESCRIB', 'ESPC'])

df_Tequip_train = pd.read_csv('CSV/ocomon_train_2.0.csv', usecols=['DESCRIB', 'EQUIP'])
df_Tequip_test = pd.read_csv('CSV/ocomon_test_2.0.csv', usecols=['DESCRIB', 'EQUIP'])

In [15]:
df_Tespc_train.count()

DESCRIB    2999
ESPC       1275
dtype: int64

In [22]:
df_Tespc_train.loc[1]

DESCRIB    computador nao liga  sujeira e mau na memoria ...
ESPC                                                     ram
Name: 1, dtype: object

In [16]:
df_Tequip_train.count()

DESCRIB    2999
EQUIP      2878
dtype: int64

In [17]:
# here was declareted a array to store all descriptions from dataframe in column "DESCRIB"
describ_Tespc_train = []
describ_Tequip_train = []

describ_Tespc_test = []
describ_Tequip_test = []

In [18]:
# store a class from description
classifier_Tespc_train = []
classifier_Tequip_train = []

classifier_Tespc_test = []
classifier_Tequip_test = []

In [19]:
for line in df_Tespc_train['DESCRIB']:
    regex = r"(null|(a)a+|(e)e+|(i)i+|(o)o+|(u)u+|(b)b+|(c)c+|(d)d+|(e)e+|(f)f+|(g)g+|(h)h+|(j)j+|(k)k+|(l)l+|(m)m+|(n)n+|(p)p+|(q)q+|(r)rr+|(s)ss+|(t)t+|(u)u+|(v)v+|(x)x+|(z)z+)"
    subst = "\\2\\3\\4\\5\\6\\7\\8\\9\\10\\11\\12\\13\\14\\15\\16\\17\\18\\19\\20\\21\\22\\23\\24\\25\\26\\27"
    result = re.sub(regex, subst, line.lower(), 0, re.MULTILINE | re.IGNORECASE)
    result = RemoveStopWords(result)
    describ_Tespc_train.append(result)
    describ_Tequip_train.append(result)
    
for line in df_Tespc_test['DESCRIB']:
    regex = r"(null|(a)a+|(e)e+|(i)i+|(o)o+|(u)u+|(b)b+|(c)c+|(d)d+|(e)e+|(f)f+|(g)g+|(h)h+|(j)j+|(k)k+|(l)l+|(m)m+|(n)n+|(p)p+|(q)q+|(r)rr+|(s)ss+|(t)t+|(u)u+|(v)v+|(x)x+|(z)z+)"
    subst = "\\2\\3\\4\\5\\6\\7\\8\\9\\10\\11\\12\\13\\14\\15\\16\\17\\18\\19\\20\\21\\22\\23\\24\\25\\26\\27"
    result = re.sub(regex, subst, line.lower(), 0, re.MULTILINE | re.IGNORECASE)
    result = RemoveStopWords(result)
    describ_Tespc_test.append(result)
    describ_Tequip_test.append(result)

In [20]:
for line in df_Tespc_train['ESPC']:
    classifier_Tespc_train.append(line)
for line in df_Tequip_train['EQUIP']:
    classifier_Tequip_train.append(line)

In [21]:
for line in df_Tespc_test['ESPC']:
    classifier_Tespc_test.append(line)
for line in df_Tequip_test['EQUIP']:
    classifier_Tequip_test.append(line)

In [73]:
train_Tespc = []
train_Tequip = []

In [74]:
for i, line in enumerate(describ_Tespc_train):
    aux = []
    aux.append(line)
    aux.append(classifier_Tespc_train[i])
    train_Tespc.append(aux)

In [75]:
for i, line in enumerate(describ_Tequip_train):
    aux = []
    aux.append(line)
    aux.append(classifier_Tequip_train[i])
    train_Tequip.append(aux)

In [76]:
test_Tespc = []
test_Tequip = []

In [77]:
for i, line in enumerate(describ_Tespc_test):
    aux = []
    aux.append(line)
    aux.append(classifier_Tespc_test[i])
    test_Tespc.append(aux)
for i, line in enumerate(describ_Tequip_test):
    aux = []
    aux.append(line)
    aux.append(classifier_Tequip_test[i])
    test_Tequip.append(aux)

In [78]:
# applie stemming in each list
train_Tespc = stemming(train_Tespc)
train_Tequip = stemming(train_Tequip)

In [79]:
test_Tespc = stemming(test_Tespc)
test_Tequip = stemming(test_Tequip)

In [80]:
# create a list of all word finded to train set
words_train_Tespc = addAllWords(train_Tespc)
words_train_Tequip = addAllWords(train_Tequip)

In [81]:
# create a list of all word finded to test set
words_test_Tespc = addAllWords(test_Tespc)
words_test_Tequip = addAllWords(test_Tequip)

In [82]:
# list frequency of all words in the list of describ
f_words_train_Tespc = freqWords(words_train_Tespc)
f_words_train_Tequip = freqWords(words_train_Tequip)

In [83]:
# list frequency of all words in the list of describ
f_words_test_Tespc = freqWords(words_test_Tespc)
f_words_test_Tequip = freqWords(words_test_Tequip)

In [84]:
# dictionary of words distinct in the list of train set
u_words_train_Tespc = uniqueWords(f_words_train_Tespc)
u_words_train_Tequip = uniqueWords(f_words_train_Tequip)

In [85]:
# dictionary of words distinct in the list of test set
u_words_test_Tespc = uniqueWords(f_words_test_Tespc)
u_words_test_Tequip = uniqueWords(f_words_test_Tequip)

In [86]:
x_Tespc, y_Tespc = train_test_split(train_Tespc, test_size = 0.3, random_state = 100)
x_Tequip, y_Tequip = train_test_split(train_Tequip, test_size = 0.3, random_state = 100)

In [87]:
baseOfTrain_Tespc = nltk.classify.apply_features(extractWords,x_Tespc)
baseOfTest_Tespc = nltk.classify.apply_features(extractWords,y_Tespc)

baseOfTrain_Tequip = nltk.classify.apply_features(extractWords,x_Tequip)
baseOfTest_Tequip = nltk.classify.apply_features(extractWords,y_Tequip)

In [88]:
complet = nltk.classify.apply_features(extractWords,test_Tespc)

In [89]:
x_train_Tespc = []
y_train_Tespc = []

x_train_Tequip = []
y_train_Tequip = []

In [90]:
for element in baseOfTrain_Tespc:
    aux = []
    for key, value in element[0].items():
        aux.append(value)
    x_train_Tespc.append(aux)
    y_train_Tespc.append(element[1])
    
for element in baseOfTrain_Tequip:
    aux = []
    for key, value in element[0].items():
        aux.append(value)
    x_train_Tequip.append(aux)
    y_train_Tequip.append(element[1])

# DECISION 

In [91]:
x_test_Tespc = []
y_test_Tespc = []

x_test_Tequip = []
y_test_Tequip = []

In [92]:
for element in baseOfTest_Tespc:
    aux = []
    for key, value in element[0].items():
        aux.append(value)
    x_test_Tespc.append(aux)
    y_test_Tespc.append(element[1])
for element in baseOfTest_Tequip:
    aux = []
    for key, value in element[0].items():
        aux.append(value)
    x_test_Tequip.append(aux)
    y_test_Tequip.append(element[1])

In [93]:
print('ESPC')
mod_Tespc = models(x_train_Tespc,y_train_Tespc, x_test_Tespc, y_test_Tespc)

ESPC




[0]Logistic Regression Training Accuracy:  0.9718913768461171
[0]Logistic Regression Training Accuracy:  0.8777777777777778
                precision    recall  f1-score   support

       bateria       1.00      0.44      0.62         9
          cabo       1.00      0.50      0.67         2
         fonte       0.94      0.89      0.91        36
            hd       1.00      0.67      0.80         6
    instalacao       0.83      0.91      0.87        80
           nan       0.89      0.95      0.92       515
         placa       0.64      0.47      0.55        19
 placa de rede       1.00      0.25      0.40         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.50      0.67      0.57         3
      software       0.87      0.81      0.84       214
         troca       0.00      0.00      0.00         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00

  'precision', 'predicted', average, warn_for)


[1]KNeighborns Training Accuracy:  0.8508813720819438
[1]KNeighborns Training Accuracy:  0.8077777777777778
                precision    recall  f1-score   support

       bateria       0.00      0.00      0.00         9
          cabo       0.00      0.00      0.00         2
         fonte       0.83      0.81      0.82        36
            hd       0.00      0.00      0.00         6
    instalacao       0.53      0.91      0.67        80
           nan       0.86      0.92      0.89       515
         placa       0.82      0.47      0.60        19
 placa de rede       0.00      0.00      0.00         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.29      0.67      0.40         3
      software       0.88      0.64      0.74       214
         troca       0.00      0.00      0.00         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00      

  'precision', 'predicted', average, warn_for)


[2]SVC Linear Training Accuracy:  0.9942829919009052
[2]SVC Linear Training Accuracy:  0.8844444444444445
                precision    recall  f1-score   support

       bateria       1.00      0.89      0.94         9
          cabo       0.33      0.50      0.40         2
         fonte       0.92      0.92      0.92        36
            hd       0.75      1.00      0.86         6
    instalacao       0.79      0.89      0.84        80
           nan       0.91      0.94      0.92       515
         placa       0.93      0.74      0.82        19
 placa de rede       0.73      1.00      0.84         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.40      0.67      0.50         3
      software       0.89      0.78      0.83       214
         troca       1.00      0.33      0.50         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00        

  'precision', 'predicted', average, warn_for)


[3]SVC RBF Training Accuracy:  0.5759885659838018
[3]SVC RBF Training Accuracy:  0.5722222222222222
                precision    recall  f1-score   support

       bateria       0.00      0.00      0.00         9
          cabo       0.00      0.00      0.00         2
         fonte       0.00      0.00      0.00        36
            hd       0.00      0.00      0.00         6
    instalacao       0.00      0.00      0.00        80
           nan       0.57      1.00      0.73       515
         placa       0.00      0.00      0.00        19
 placa de rede       0.00      0.00      0.00         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.00      0.00      0.00         3
      software       0.00      0.00      0.00       214
         troca       0.00      0.00      0.00         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00         1

  

  'precision', 'predicted', average, warn_for)


[4]Gaussian NB Training Accuracy:  0.8604097189137685
[4]Gaussian NB Training Accuracy:  0.65
                precision    recall  f1-score   support

       bateria       1.00      0.56      0.71         9
          cabo       0.04      0.50      0.08         2
         fonte       0.38      0.14      0.20        36
            hd       0.10      0.50      0.17         6
    instalacao       0.34      0.45      0.39        80
           nan       0.82      0.79      0.80       515
         placa       0.44      0.42      0.43        19
 placa de rede       0.00      0.00      0.00         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.10      0.33      0.15         3
      software       0.64      0.55      0.59       214
         troca       0.00      0.00      0.00         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00         1

      ac

  'precision', 'predicted', average, warn_for)


[5]Decision Tree Training Accuracy:  0.9990471653168176
[5]Decision Tree Training Accuracy:  0.8511111111111112
                precision    recall  f1-score   support

       bateria       0.89      0.89      0.89         9
          cabo       0.25      0.50      0.33         2
         fonte       0.92      0.92      0.92        36
            hd       0.56      0.83      0.67         6
    instalacao       0.74      0.78      0.76        80
           nan       0.88      0.92      0.90       515
         placa       0.92      0.58      0.71        19
 placa de rede       0.67      1.00      0.80         8
placa de video       1.00      0.50      0.67         2
   processador       0.00      0.00      0.00         1
           ram       0.40      0.67      0.50         3
      software       0.85      0.74      0.80       214
         troca       0.50      0.33      0.40         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00  

  'precision', 'predicted', average, warn_for)


[6]Random Forest Training Accuracy:  0.993806574559314
[6]Random Forest Training Accuracy:  0.85
                precision    recall  f1-score   support

       bateria       1.00      0.56      0.71         9
          cabo       1.00      0.50      0.67         2
         fonte       0.96      0.72      0.83        36
            hd       0.62      0.83      0.71         6
    instalacao       0.67      0.89      0.76        80
           nan       0.88      0.94      0.91       515
         placa       0.67      0.53      0.59        19
 placa de rede       1.00      0.12      0.22         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.33      0.67      0.44         3
      software       0.88      0.74      0.80       214
         troca       0.00      0.00      0.00         3
           usb       0.00      0.00      0.00         1
         video       0.00      0.00      0.00         1

     

  'precision', 'predicted', average, warn_for)


[7] Perceptron Training Accuracy:  0.9952358265840877
[7] Perceptron Testing accuracy:  0.8644444444444445
                precision    recall  f1-score   support

       bateria       1.00      0.78      0.88         9
          cabo       0.33      0.50      0.40         2
           dvd       0.00      0.00      0.00         0
         fonte       0.94      0.83      0.88        36
            hd       0.86      1.00      0.92         6
    instalacao       0.86      0.80      0.83        80
       monitor       0.00      0.00      0.00         0
           nan       0.92      0.92      0.92       515
         placa       0.72      0.68      0.70        19
 placa de rede       0.83      0.62      0.71         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.50      0.33      0.40         3
      software       0.85      0.82      0.83       214
         troca       0.25      0.33      0.29       

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


[8] MultinomialNB Training Accuracy:  0.9232968080038113
[8] MultinomialNB Testing accuracy:  0.7911111111111111
                precision    recall  f1-score   support

       bateria       1.00      0.78      0.88         9
          cabo       0.33      0.50      0.40         2
           dvd       0.00      0.00      0.00         0
         fonte       0.94      0.83      0.88        36
            hd       0.86      1.00      0.92         6
    instalacao       0.86      0.80      0.83        80
       monitor       0.00      0.00      0.00         0
           nan       0.92      0.92      0.92       515
         placa       0.72      0.68      0.70        19
 placa de rede       0.83      0.62      0.71         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.50      0.33      0.40         3
      software       0.85      0.82      0.83       214
         troca       0.25      0.33      0.29 

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


[9] BernoulliNB Training Accuracy:  0.9156741305383516
[9] BernoulliNB Testing accuracy:  0.7944444444444444
                precision    recall  f1-score   support

       bateria       1.00      0.78      0.88         9
          cabo       0.33      0.50      0.40         2
           dvd       0.00      0.00      0.00         0
         fonte       0.94      0.83      0.88        36
            hd       0.86      1.00      0.92         6
    instalacao       0.86      0.80      0.83        80
       monitor       0.00      0.00      0.00         0
           nan       0.92      0.92      0.92       515
         placa       0.72      0.68      0.70        19
 placa de rede       0.83      0.62      0.71         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.50      0.33      0.40         3
      software       0.85      0.82      0.83       214
         troca       0.25      0.33      0.29     

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


[10] ComplementNB Training Accuracy:  0.8999523582658409
[10] ComplementNB Testing accuracy:  0.8177777777777778
                precision    recall  f1-score   support

       bateria       1.00      0.78      0.88         9
          cabo       0.33      0.50      0.40         2
           dvd       0.00      0.00      0.00         0
         fonte       0.94      0.83      0.88        36
            hd       0.86      1.00      0.92         6
    instalacao       0.86      0.80      0.83        80
       monitor       0.00      0.00      0.00         0
           nan       0.92      0.92      0.92       515
         placa       0.72      0.68      0.70        19
 placa de rede       0.83      0.62      0.71         8
placa de video       0.00      0.00      0.00         2
   processador       0.00      0.00      0.00         1
           ram       0.50      0.33      0.40         3
      software       0.85      0.82      0.83       214
         troca       0.25      0.33      0.29 

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [94]:
print('EQUIP')
mod_Tequip = models(x_train_Tequip,y_train_Tequip, x_test_Tequip, y_test_Tequip)

EQUIP




[0]Logistic Regression Training Accuracy:  0.9866603144354454
[0]Logistic Regression Training Accuracy:  0.9077777777777778
               precision    recall  f1-score   support

   computador       0.91      0.89      0.90       321
     datashow       1.00      0.82      0.90        11
       e-mail       0.97      0.99      0.98       172
estabilizador       0.00      0.00      0.00         3
   impressora       0.89      0.99      0.94       122
     internet       0.86      0.93      0.89       187
      monitor       1.00      0.67      0.80         3
          nan       0.91      0.67      0.77        48
      nobreak       1.00      0.92      0.96        25
     notebook       0.00      0.00      0.00         8

     accuracy                           0.91       900
    macro avg       0.75      0.69      0.71       900
 weighted avg       0.90      0.91      0.90       900



  'precision', 'predicted', average, warn_for)


[1]KNeighborns Training Accuracy:  0.8947117675083373
[1]KNeighborns Training Accuracy:  0.8411111111111111
               precision    recall  f1-score   support

   computador       0.86      0.85      0.86       321
     datashow       0.90      0.82      0.86        11
       e-mail       0.97      0.97      0.97       172
estabilizador       0.00      0.00      0.00         3
   impressora       0.67      0.98      0.80       122
     internet       0.84      0.82      0.83       187
      monitor       0.00      0.00      0.00         3
          nan       0.80      0.50      0.62        48
      nobreak       1.00      0.48      0.65        25
     notebook       0.00      0.00      0.00         8

     accuracy                           0.84       900
    macro avg       0.60      0.54      0.56       900
 weighted avg       0.84      0.84      0.83       900



  'precision', 'predicted', average, warn_for)


[2]SVC Linear Training Accuracy:  0.9980943306336351
[2]SVC Linear Training Accuracy:  0.9177777777777778
               precision    recall  f1-score   support

   computador       0.92      0.88      0.90       321
     datashow       1.00      0.82      0.90        11
       e-mail       0.97      0.99      0.98       172
estabilizador       1.00      0.33      0.50         3
   impressora       0.91      0.98      0.94       122
     internet       0.88      0.93      0.90       187
      monitor       1.00      0.33      0.50         3
          nan       0.83      0.83      0.83        48
      nobreak       0.96      1.00      0.98        25
     notebook       0.75      0.38      0.50         8

     accuracy                           0.92       900
    macro avg       0.92      0.75      0.79       900
 weighted avg       0.92      0.92      0.92       900

[3]SVC RBF Training Accuracy:  0.4540257265364459
[3]SVC RBF Training Accuracy:  0.4577777777777778
               precis

  'precision', 'predicted', average, warn_for)


[4]Gaussian NB Training Accuracy:  0.8966174368747022
[4]Gaussian NB Training Accuracy:  0.6811111111111111
               precision    recall  f1-score   support

   computador       0.71      0.69      0.70       321
     datashow       0.57      0.73      0.64        11
       e-mail       0.98      0.88      0.93       172
estabilizador       0.00      0.00      0.00         3
   impressora       0.43      0.59      0.49       122
     internet       0.74      0.69      0.71       187
      monitor       1.00      0.67      0.80         3
          nan       1.00      0.21      0.34        48
      nobreak       0.49      0.80      0.61        25
     notebook       0.00      0.00      0.00         8

     accuracy                           0.68       900
    macro avg       0.59      0.53      0.52       900
 weighted avg       0.73      0.68      0.69       900

[5]Decision Tree Training Accuracy:  1.0
[5]Decision Tree Training Accuracy:  0.8788888888888889
               precisi

  'precision', 'predicted', average, warn_for)


[7] Perceptron Training Accuracy:  0.9995235826584088
[7] Perceptron Testing accuracy:  0.8955555555555555
               precision    recall  f1-score   support

   computador       0.90      0.89      0.90       321
     datashow       0.56      0.82      0.67        11
       e-mail       0.97      0.99      0.98       172
estabilizador       0.50      0.33      0.40         3
   impressora       0.89      0.97      0.93       122
     internet       0.89      0.86      0.87       187
      monitor       0.33      0.33      0.33         3
          nan       0.80      0.67      0.73        48
      nobreak       1.00      1.00      1.00        25
     notebook       0.60      0.38      0.46         8

     accuracy                           0.90       900
    macro avg       0.74      0.72      0.73       900
 weighted avg       0.89      0.90      0.89       900

[8] MultinomialNB Training Accuracy:  0.9728442115292997
[8] MultinomialNB Testing accuracy:  0.8555555555555555
       

In [95]:
cpl = []
for element in complet:
    aux = []
    for key, value in element[0].items():
        aux.append(value)
    cpl.append(aux)

In [96]:
y_pred_cpl_Tespc = mod_Tespc[2].predict(cpl)
y_pred_cpl_Tequip = mod_Tequip[2].predict(cpl)

In [97]:
df2 = pd.DataFrame({'ESPC': y_pred_cpl_Tespc})
df2.to_csv('prediction_Tespc_ocomon.csv', index=False)

df3 = pd.DataFrame({'EQUIP': y_pred_cpl_Tequip})
df3.to_csv('prediction_Tequip_ocomon.csv', index=False)

In [98]:
new_df = pd.read_csv('ocomon_log.csv')
new_df.tail(3)

Unnamed: 0,DESCRIB,PROB_ORIG,PROB,ESPC,EQUIP,I_NAME,R_NAME,C_NAME,EQUIP_ID,HAS_VALUE,...,HOUR_OPEN,MIN_OPEN,DATA_OPEN,CLOSE_YEAR_4,CLOSE_MONTH_4,CLOSE_DAY_4,HOUR_CLOSE,MIN_CLOSE,DATA_CLOSE,STI
15799,liga e desliga reinicia sozinho local prospeqi...,,software,s.o,computador,teresina,joao xavier,bolsista geral 2018,237490,NP,...,16,31,16/9/2019,-1,-1,-1,-1,-1,99/99/9999,0
15800,windows corrompidocoordenadoria de avaliacao e...,,software,s.o,computador,teresina,raphaela da mota silva,bolsista geral 2018,256479,NP,...,17,35,17/9/2019,-1,-1,-1,-1,-1,99/99/9999,0
15801,computador nao liga aberto por felipe a null ...,,hardware,hardware,computador,teresina,lucas lopes,bolsista geral 2018,161603,NP,...,8,54,18/9/2019,-1,-1,-1,-1,-1,99/99/9999,sti_central


In [99]:
hvpre2 = df2.ESPC.tolist()
hvpre3 = df3.EQUIP.tolist()

In [100]:
new_df['ESPC'] = hvpre2
new_df['EQUIP'] = hvpre3

In [101]:
new_df.to_csv('ocomon_FINAL.csv', index=False)

In [102]:
new_df

Unnamed: 0,DESCRIB,PROB_ORIG,PROB,ESPC,EQUIP,I_NAME,R_NAME,C_NAME,EQUIP_ID,HAS_VALUE,...,HOUR_OPEN,MIN_OPEN,DATA_OPEN,CLOSE_YEAR_4,CLOSE_MONTH_4,CLOSE_DAY_4,HOUR_CLOSE,MIN_CLOSE,DATA_CLOSE,STI
0,foi solicitado a manutencao de pontos de reded...,,N.A,,internet,teresina,jaclason machado veras,julia sousa santos,-999999,,...,9,15,18/4/2012,1,2,4,9,0,26/4/2012,sti_cca
1,computador nao liga sujeira e mau na memoria ...,,hardware,ram,computador,teresina,secretaria do mestrado de piliticas publicas,luis carlos dos santos farias,140981,P,...,8,13,23/4/2014,2,2,4,9,4,23/4/2014,sti_cchl
2,meu computador esta ligando o monitor tb mas a...,,hardware,monitor,monitor,teresina,gabinete do reitor 5511,constanca dolores lopes monteiro,-999999,P,...,15,43,24/10/2011,1,4,4,15,59,24/10/2011,sti_central
3,desabilite o envio de email como o servico de ...,utilize esta descricao de problema para proble...,N.A,,,teresina,guilherme,guilherme avelino,-999999,,...,13,20,24/10/2011,1,4,4,17,6,26/10/2011,sti_central
4,instalacao de impressoras e remocao de virus ...,,hardware,software,computador,teresina,,bolsista nti,-999999,NP,...,12,13,24/10/2011,1,4,4,9,11,26/10/2011,sti_central
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15797,computador lentidao preuni cp foi realizado ba...,,software,software,computador,teresina,erica cecilia,bolsista geral 2018,188134,NP,...,15,23,13/9/2019,4,3,3,8,21,17/9/2019,0
15798,computador lento e nao liga formatar apenas se...,,hardware,software,computador,teresina,igor cunha,bolsista geral 2018,175491,NP,...,16,21,13/9/2019,4,3,2,17,47,13/9/2019,0
15799,liga e desliga reinicia sozinho local prospeqi...,,software,software,computador,teresina,joao xavier,bolsista geral 2018,237490,NP,...,16,31,16/9/2019,-1,-1,-1,-1,-1,99/99/9999,0
15800,windows corrompidocoordenadoria de avaliacao e...,,software,software,computador,teresina,raphaela da mota silva,bolsista geral 2018,256479,NP,...,17,35,17/9/2019,-1,-1,-1,-1,-1,99/99/9999,0
