forked from yanshengli/sina_weibo_crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
saveClassifer.py
37 lines (35 loc) · 1.58 KB
/
saveClassifer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#-*-coding=utf8-*-
import sklearn
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle
import evalueClassier as ec
import extractFeatures as ef
import evalueClassier as ec
#把分类器存储下来
pos_review = pickle.load(open('/Users/genghaiyang/ghy_works/projects/weibo_crawler/textmining/sentiML/pos_neg_review/pos_review.pkl','r'))
neg_review = pickle.load(open('/Users/genghaiyang/ghy_works/projects/weibo_crawler/textmining/sentiML/pos_neg_review/neg_review.pkl','r'))
neg_review = neg_review*3
pos = pos_review[:50]
neg = neg_review[:50]
word_scores = ef.create_word_scores(pos,neg,'pos','neg')
best_words = ef.find_best_words(word_scores, 1000)
posFeatures = []
for p in pos:
pos_selected = ef.best_word_features(p,best_words)
posFeatures.append(ef.tagFeatures(pos_selected,'pos'))
negFeatures = []
for n in neg:
neg_selected = ef.best_word_features(n,best_words)
negFeatures.append(ef.tagFeatures(neg_selected,'neg'))
# divide Features into train devtest and test sets.
trainSet = posFeatures[:50]+negFeatures[:50]
devtestSet = posFeatures[40:50]+negFeatures[40:50]
testSet = posFeatures[40:50]+negFeatures[40:50]
#Train and save classifier
NuSVC_classifier = SklearnClassifier(NuSVC(probability=True))
NuSVC_classifier.train(trainSet)
pickle.dump(NuSVC_classifier, open('/Users/genghaiyang/ghy_works/projects/weibo_crawler/textmining/sentiML/NuSVC_classifier.pkl','w'))