# Voting

In [30]:
import numpy as np
from nltk.corpus import stopwords
from sklearn.datasets import load_files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.metrics import accuracy_score

import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\eduardo.guimaraes\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package stopwords is already up-to-date!


True

In [31]:
stopwords = set(stopwords.words("english"))

news = load_files('dados', encoding='utf-8', decode_error='replace')

In [32]:
x = news.data
y = news.target

In [33]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=93)

In [34]:
vectorizer = TfidfVectorizer(norm = None, stop_words=stopwords, max_features=1000, decode_error='ignore')

In [35]:
# treinando e tranformando o vector
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

In [36]:
logic = LogisticRegression(multi_class= 'multinomial', solver='lbfgs', random_state=42, max_iter=1000)
random_forest = RandomForestClassifier(n_estimators=1000, max_depth=100, random_state=42)
multinomial = MultinomialNB()

In [37]:
voting_model = VotingClassifier(estimators = [
    ('lg', logic),
    ('rf', random_forest),
    ('nb', multinomial)
], voting='soft')

print("\nModelo de Votacao:\n")
print(voting_model)

voting_model.fit(x_train_vec, y_train)

predict = voting_model.predict(x_test_vec)

accuracy = accuracy_score(y_test, predict)

print(f"\nAccuracy: {accuracy}\n")


Modelo de Votacao:

VotingClassifier(estimators=[('lg',
                              LogisticRegression(max_iter=1000,
                                                 multi_class='multinomial',
                                                 random_state=42)),
                             ('rf',
                              RandomForestClassifier(max_depth=100,
                                                     n_estimators=1000,
                                                     random_state=42)),
                             ('nb', MultinomialNB())],
                 voting='soft')

Accuracy: 0.9880239520958084



In [38]:
base_model = [
    ('rf', RandomForestClassifier(n_estimators=1000, random_state=42)),
    ('nb', MultinomialNB())
]

stacking_model = StackingClassifier(estimators=base_model, final_estimator=LogisticRegression(multi_class='multinomial', random_state=42, max_iter=1000))
print("Stacking Model: ", stacking_model)

accuracy = stacking_model.fit(x_train_vec.todense(), y_train).score(x_test_vec, y_test)

print(f"\nAccuracy: {accuracy}\n")

Stacking Model:  StackingClassifier(estimators=[('rf',
                                RandomForestClassifier(n_estimators=1000,
                                                       random_state=42)),
                               ('nb', MultinomialNB())],
                   final_estimator=LogisticRegression(max_iter=1000,
                                                      multi_class='multinomial',
                                                      random_state=42))





Accuracy: 0.9820359281437125

