In [23]:
# 导入数据集
import pickle

with open('../preprocess/train/train_bag.pickle', 'rb') as file_obj:
    train_bunch = pickle.load(file_obj)
with open('../preprocess/test/test_bag.pickle', 'rb') as file_obj:
    test_bunch = pickle.load(file_obj)

In [24]:
X_train = train_bunch.label
y_train = train_bunch.tfidf_weight_matrices

In [25]:
# 单SVM
from sklearn.svm import LinearSVC
SVM = LinearSVC(C=1, tol=1e-5)
SVM.fit(y_train, X_train)
svm_predicted = SVM.predict(test_bunch.tfidf_weight_matrices)

In [26]:
# 单Bayes
from sklearn.naive_bayes import MultinomialNB
Bayes = MultinomialNB(alpha=0.001)
Bayes.fit(y_train, X_train)
bayes_predicted = Bayes.predict(test_bunch.tfidf_weight_matrices)

In [27]:
# 单决策树
from sklearn.tree import DecisionTreeClassifier
Tree = DecisionTreeClassifier()
Tree.fit(y_train, X_train)
tree_predicted = Tree.predict(test_bunch.tfidf_weight_matrices)

In [28]:
from sklearn.metrics import classification_report
print("----------------SVM----------------")
print(classification_report(test_bunch.label, svm_predicted))	# 打印结果
print("----------------Bayes----------------")
print(classification_report(test_bunch.label, bayes_predicted))	# 打印结果
print("----------------Tree----------------")
print(classification_report(test_bunch.label, tree_predicted))	# 打印结果

----------------SVM----------------
              precision    recall  f1-score   support

          体育       1.00      1.00      1.00      1000
          娱乐       0.97      0.98      0.98      1000
          教育       0.92      0.87      0.90      1000
          时尚       0.94      0.96      0.95      1000
          时政       0.96      0.93      0.95      1000
          游戏       0.97      0.97      0.97      1000
          社会       0.93      0.90      0.91      1000
          科技       0.91      0.92      0.91      1000
          股票       0.95      0.97      0.96      1000
          财经       0.94      0.98      0.96      1000

    accuracy                           0.95     10000
   macro avg       0.95      0.95      0.95     10000
weighted avg       0.95      0.95      0.95     10000

----------------Bayes----------------
              precision    recall  f1-score   support

          体育       0.98      0.98      0.98      1000
          娱乐       0.91      0.98      0.94      1000
    

In [36]:
# Adaboost
from sklearn.ensemble import AdaBoostClassifier
import time
# 每个adaboost模型都有50个弱分类器
# 使用分类器对样本集预测错误的概率进行划分
SVC_adaboost = AdaBoostClassifier(LinearSVC(C=1, tol=1e-5),n_estimators=50,algorithm='SAMME')
Bayes_adaboost = AdaBoostClassifier(MultinomialNB(alpha=0.001),n_estimators=50,algorithm='SAMME.R')
Tree_adaboost = AdaBoostClassifier(DecisionTreeClassifier(),n_estimators=50,algorithm='SAMME.R')

start = time.time()
print("Start SVM Training")
SVC_adaboost.fit(y_train, X_train)
SVC_train_time = time.time() - start
print("SVM Training Time: " + str(SVC_train_time))

start = time.time()
print("Start Bayes Training")
Bayes_adaboost.fit(y_train, X_train)
Bayes_train_time = time.time() - start
print("Bayes Training Time: " + str(Bayes_train_time))

start = time.time()
print("Start Tree Training")
Tree_adaboost.fit(y_train, X_train)
Tree_train_time = time.time() - start
print("Tree Training Time: " + str(Tree_train_time))

SVC_adaboost_predicted = SVC_adaboost.predict(test_bunch.tfidf_weight_matrices)
Bayes_adaboost_predicted = Bayes_adaboost.predict(test_bunch.tfidf_weight_matrices)
Tree_adaboost_predicted = Tree_adaboost.predict(test_bunch.tfidf_weight_matrices)

print("----------------SVM----------------")
print(classification_report(test_bunch.label, SVC_adaboost_predicted))	# 打印结果
print("----------------Bayes----------------")
print(classification_report(test_bunch.label, Bayes_adaboost_predicted))	# 打印结果
print("----------------Tree----------------")
print(classification_report(test_bunch.label, Tree_adaboost_predicted))	# 打印结果

Start SVM Training
SVM Training Time: 88.95758128166199
Start Bayes Training
Bayes Training Time: 5.5096070766448975
Start Tree Training
Tree Training Time: 582.570562839508
----------------SVM----------------
              precision    recall  f1-score   support

          体育       0.99      0.98      0.99      1000
          娱乐       0.93      0.97      0.95      1000
          教育       0.84      0.80      0.82      1000
          时尚       0.80      0.96      0.87      1000
          时政       0.96      0.85      0.90      1000
          游戏       0.82      0.96      0.88      1000
          社会       0.88      0.86      0.87      1000
          科技       0.86      0.57      0.69      1000
          股票       0.94      0.98      0.96      1000
          财经       0.91      0.98      0.94      1000

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000

----------------Bayes----------

In [37]:
# 混合型Adaboost
import matplotlib
import matplotlib.pyplot as plt
from numpy import *

AttributeError: 'list' object has no attribute 'shape'