In [33]:
import joblib
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, \
    HistGradientBoostingClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import GaussianNB, BernoulliNB
# Import some data to play with
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC


class Ensemble:
    def __init__(self):
        self.x_train = None
        self.x_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self, text, train):
        s = np.loadtxt(text, dtype=np.float32, delimiter=' ')
        end = s.shape[1] - 1
        text_X = s[:, :end]
        text_y = s[:, -1]
        s = np.loadtxt(train, dtype=np.float32, delimiter=' ')
        end = s.shape[1] - 1
        train_X = s[:, :end]
        train_y = s[:, -1]
        self.x_train, self.x_test, self.y_train, self.y_test = train_X, text_X, train_y, text_y

    @staticmethod
    def __Classifiers__(name=None):
        # See for reproducibility
        random_state = 100
        kernel = 1.0 * RBF(1.0)
        if name == 'Neighbors':
            return RadiusNeighborsClassifier(radius=1.0)
        if name == 'Gaussian_Process':
            return GaussianProcessClassifier(kernel=kernel, random_state=random_state)
        if name == 'Gaussian_NB':
            return GaussianNB()
        if name == 'Bernoulli_NB':
            return BernoulliNB()
        if name == 'DecisionTree':
            return tree.DecisionTreeClassifier()
        if name == 'Bagging':
            return BaggingClassifier(base_estimator=SVC())
        if name == 'RandomForest':
            return RandomForestClassifier(n_estimators=10)
        if name == 'AdaBoost':
            return AdaBoostClassifier(n_estimators=100, random_state=random_state)
        if name == 'GradientBoosting':
            return GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1,
                                              random_state=random_state)
        if name == 'HistGradientBoosting':
            return HistGradientBoostingClassifier()
        if name == 'MLP':
            return MLPClassifier(random_state=random_state)

    # 1.6.2
    def __Neighbors__(self):
        # Decision Tree Classifier
        neigh = Ensemble.__Classifiers__(name='Neighbors')
        # Init Grid Search
        neigh.fit(self.x_train, self.y_train)

    # 1.7.2
    def __GPC__(self):
        # Decision Tree Classifier
        GPC = Ensemble.__Classifiers__(name='Gaussian_Process')
        # Init Grid Search
        GPC.fit(self.x_train, self.y_train)

    # 1.9.1
    def __Gaussian_NB__(self):
        # Decision Tree Classifier
        gnb = Ensemble.__Classifiers__(name='Gaussian_NB')
        # Init Grid Search
        gnb.fit(self.x_train, self.y_train)

    # 1.9.4
    def __Bernoulli_NB__(self):
        # Decision Tree Classifier
        bnb = Ensemble.__Classifiers__(name='Bernoulli_NB')
        # Init Grid Search
        bnb.fit(self.x_train, self.y_train)

    # 1.10.1
    def __DecisionTree__(self):
        # Decision Tree Classifier
        dt = Ensemble.__Classifiers__(name='DecisionTree')
        # Init Grid Search
        dt.fit(self.x_train, self.y_train)

    # 1.11.1
    def __Bagging__(self):
        # Decision Tree Classifier
        bag = Ensemble.__Classifiers__(name='Bagging')
        # Init Grid Search
        bag.fit(self.x_train, self.y_train)

    # 1.11.2
    def __RandomForest__(self):
        # Decision Tree Classifier
        Forest = Ensemble.__Classifiers__(name='RandomForest')
        # Init Grid Search
        Forest.fit(self.x_train, self.y_train)

    # 1.11.3
    def __AdaBoost__(self):
        # Decision Tree Classifier
        AdaBoost = Ensemble.__Classifiers__(name='AdaBoost')
        # Init Grid Search
        AdaBoost.fit(self.x_train, self.y_train)

    # 1.11.4
    def __GradientBoosting__(self):
        # Decision Tree Classifier
        Gdbt = Ensemble.__Classifiers__(name='GradientBoosting')
        # Init Grid Search
        Gdbt.fit(self.x_train, self.y_train)

    # 1.11.5
    def __HistGradientBoosting__(self):
        # Decision Tree Classifier
        HGdbt = Ensemble.__Classifiers__(name='HistGradientBoosting')
        # Init Grid Search
        HGdbt.fit(self.x_train, self.y_train)

    # 1.17.2
    def __MLPClassifier_1__(self):
        # Decision Tree Classifier
        MLP = Ensemble.__Classifiers__(name='MLP')
        # Init Grid Search
        MLP.fit(self.x_train, self.y_train)

    def __VotingClassifier__(self):

        # Instantiate classifiers

        #Neigh = Ensemble.__Classifiers__(name='Neighbors')
        GPC = Ensemble.__Classifiers__(name='Gaussian_Process')
        gnb = Ensemble.__Classifiers__(name='Gaussian_NB')
        bnb = Ensemble.__Classifiers__(name='Bernoulli_NB')
        dt = Ensemble.__Classifiers__(name='DecisionTree')
        bag = Ensemble.__Classifiers__(name='Bagging')
        Forest = Ensemble.__Classifiers__(name='RandomForest')
        Ada = Ensemble.__Classifiers__(name='AdaBoost')
        Gdbt = Ensemble.__Classifiers__(name='GradientBoosting')
        HGdbt = Ensemble.__Classifiers__(name='HistGradientBoosting')
        MLP = Ensemble.__Classifiers__(name='MLP')
        # Voting Classifier initialization
        vc = VotingClassifier(estimators=[('Gaussian_Process', GPC), ('Gaussian_NB', gnb),
                                          ('Bernoulli_NB', bnb), ('DecisionTree', dt), ('Bagging', bag),
                                          ('RandomForest', Forest), ('AdaBoost', Ada), ('GradientBoosting', Gdbt),
                                          ('HistGradientBoosting', HGdbt), ('MLP', MLP)
                                          ], voting='soft')
        # Fitting the vc model
        vc.fit(self.x_train, self.y_train)

        return vc

In [35]:
text = '.\\test\\TAPE.txt'
train = '.\\test\\TAPE.txt'
ensemble = Ensemble()
ensemble.load_data(text, train)
model = ensemble.__VotingClassifier__()





[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Train accuracy: 0.9703947368421053
Test accuracy: 0.875
评价指标:             



                  label       准确率       偏移值
0             Neighbors  0.720273  0.032247
1      Gaussian_Process  0.648087  0.021192
2           Gaussian_NB  0.703880  0.050108
3          Bernoulli_NB  0.644754  0.021845
4          DecisionTree  0.736776  0.015777
5               Bagging  0.704044  0.021982
6          RandomForest  0.684317  0.035629
7              AdaBoost  0.664536  0.044228
8      GradientBoosting  0.703989  0.013711
9  HistGradientBoosting  0.713607  0.064673




In [36]:
import pickle
# save model
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)


In [None]:
s = np.loadtxt(text, dtype=np.float32, delimiter=' ')
end = s.shape[1] - 1
text_X = s[:, :end]
text_y = s[:, -1]
# read model
with open('model.pkl', 'rb') as f:
    model1 = pickle.load(f)

# predict
result = model1.predict(text_X)
print(result)
