In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn import svm
from sklearn import datasets
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
import pandas as pd
import numpy as np
import time
from tqdm import trange
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from imblearn.over_sampling import SVMSMOTE
from imblearn.combine import SMOTETomek
from imblearn.combine import SMOTEENN
from collections import Counter

导入数据

In [2]:
table = pd.read_csv(r'D:\repos\py\temp\online_shoppers_intention.csv')
y = table.iloc[:, -1]
table = table.drop(columns=['Revenue'])
X = pd.get_dummies(table)
mm = MinMaxScaler(feature_range=(0, 1))
X = mm.fit_transform(X)

train_data, test_data, train_label, test_label = train_test_split(X, y, test_size=0.3, random_state=3)
smote_tomek = SMOTETomek(random_state=3)
train_data, train_label = smote_tomek.fit_resample(train_data, train_label)
test_label = np.array(test_label).ravel()
train_label = np.array(train_label).ravel()

定义分类器

In [3]:
class Classifier:
    def __init__(self):
        self.train_data = train_data
        self.train_label = train_label
        self.test_data = test_data
        self.test_label = test_label

In [4]:
class NaiveBayes(Classifier):
    def __init__(self):
        super(NaiveBayes, self).__init__()
        self.clf = GaussianNB()
        self.train()
        self.score()

    def train(self):
        start = time.time()
        self.clf.fit(train_data, train_label.ravel())
        end = time.time()
        print('naive bayes:\ntraining time:', end - start, 's')

    def score(self):
        accu = cross_val_score(self.clf, self.test_data, self.test_label, cv=10).mean()
        predict = self.clf.predict(test_data)
        f1 = f1_score(test_label, predict)
        print('accuracy:{:.10f}\tf1-score:{:.10f}\n'.format(accu, f1))

In [5]:
class KNN(Classifier):
    def __init__(self, n=5):
        super(KNN, self).__init__()
        self.clf = KNeighborsClassifier(n_neighbors=n)
        self.train()
        self.score()

    def train(self):
        start = time.time()
        self.clf.fit(train_data, train_label.ravel())
        end = time.time()
        print('knn:\ntraining time:', end - start, 's')

    def score(self):
        accu = cross_val_score(self.clf, self.test_data, self.test_label, cv=10).mean()
        predict = self.clf.predict(test_data)
        f1 = f1_score(test_label, predict)
        print('accuracy:{:.10f}\tf1-score:{:.10f}\n'.format(accu, f1))

In [6]:
class DecisionTree(Classifier):
    def __init__(self):
        super(DecisionTree, self).__init__()
        self.clf = tree.DecisionTreeClassifier(criterion="entropy")
        self.train()
        self.score()

    def train(self):
        start = time.time()
        self.clf.fit(train_data, train_label.ravel())
        end = time.time()
        print('decision tree:\ntraining time:', end - start, 's')

    def score(self):
        accu = cross_val_score(self.clf, self.test_data, self.test_label, cv=10).mean()
        predict = self.clf.predict(test_data)
        f1 = f1_score(test_label, predict)
        print('accuracy:{:.10f}\tf1-score:{:.10f}\n'.format(accu, f1))

In [7]:
class SVM(Classifier):
    def __init__(self):
        super(SVM, self).__init__()
        self.clf = svm.SVC(C=2, kernel='rbf', decision_function_shape='ovr')
        self.train()
        self.score()

    def train(self):
        start = time.time()
        self.clf.fit(train_data, train_label.ravel())
        end = time.time()
        print('svm:\ntraining time:', end - start, 's')

    def score(self):
        accu = cross_val_score(self.clf, self.test_data, self.test_label, cv=10).mean()
        predict = self.clf.predict(test_data)
        f1 = f1_score(test_label, predict)
        print('accuracy:{:.10f}\tf1-score:{:.10f}\n'.format(accu, f1))

In [8]:
naive_bayes = NaiveBayes()
knn = KNN()
decision_tree = DecisionTree()
svm = SVM()

naive bayes:
training time: 0.007980108261108398 s
accuracy:0.3768563686	f1-score:0.3640845070

knn:
training time: 0.0 s
accuracy:0.8429282941	f1-score:0.3990877993

decision tree:
training time: 0.12865567207336426 s
accuracy:0.8680773456	f1-score:0.5730612245

svm:
training time: 5.378597974777222 s
accuracy:0.8751036402	f1-score:0.6098265896

