In [None]:
import random
import time
import pandas as pd
import numpy as np
from functools import reduce
from statistics import mode
from statistics import mean
import operator
import collections
import sklearn_crfsuite
from sklearn_crfsuite import metrics

class ensemble:    
    def word2features(self, sent, i):
        word = sent[i][0]
        postag = sent[i][1]

        features = {
            'bias': 1.0,
            'word.lower()': word.lower(),
            'word[-3:]': word[-3:],
            'word[-2:]': word[-2:],
            'word.isupper()': word.isupper(),
            'word.istitle()': word.istitle(),
            'word.isdigit()': word.isdigit(),
            'postag': postag,
            'postag[:2]': postag[:2],
        }
        if i > 0:
            word1 = sent[i-1][0]
            postag1 = sent[i-1][1]
            features.update({
                '-1:word.lower()': word1.lower(),
                '-1:word.istitle()': word1.istitle(),
                '-1:word.isupper()': word1.isupper(),
                '-1:postag': postag1,
                '-1:postag[:2]': postag1[:2],
            })
        else:
            features['BOS'] = True

        if i < len(sent)-1:
            word1 = sent[i+1][0]
            postag1 = sent[i+1][1]
            features.update({
                '+1:word.lower()': word1.lower(),
                '+1:word.istitle()': word1.istitle(),
                '+1:word.isupper()': word1.isupper(),
                '+1:postag': postag1,
                '+1:postag[:2]': postag1[:2],
            })
        else:
            features['EOS'] = True

        return features

    def sent2features(self,sent):
        return [self.word2features(sent, i) for i in range(len(sent))]

    def sent2labels(self,sent):
        return [label for token, postag, label in sent]

    def sent2tokens(sent):
        return [token for token, postag, label in sent]
    
    def __init__(self,num):
        self.num = num
        self.classifiers = list()
        
    def fit(self,data,c1):
        """Trains multiple CRF models through random sampling
        Args:
            param1 (list): data as a list.
            param2 (num): number of models to train.
        Returns:
            list: Returns trained CRF models as a list of objects.
        """
        samplesize = int(len(data)*0.8)
        self.classifiers = list()
        for n in range(self.num):
          sample = random.choices(data, k=samplesize)
          print(len(sample))
          X = [self.sent2features(s) for s in sample]
          y = [self.sent2labels(s) for s in sample]
          crf = sklearn_crfsuite.CRF(c1=c1, c2=0.1, max_iterations = 100)
          print('Training Classifier: ',end="")
          print(n, end=" ")
          crf.fit(X,y)
          self.classifiers.append(crf)
          print('Done')

    def most_frequent(List):
      return max(set(List), key = List.count) 
    
    def mean(self,a,b):
        return (a+b)
    
    def sumdict(self,dcts):
        dict1 = {}
        for i in set(dcts[0]).intersection(*dcts[1:]):
            add = sum(d[i] for d in dcts)
            dict1[i]=(add/len(dcts))
        return(dict1)
    
    def addProb(self,iter):
      probWord = zip(*iter)
      probSum = [self.sumdict(i) for i in probWord]
      return probSum

    def generatePred(self,predictors, testData, batchsize, prob, label):
      for offset in range(0,len(testData),batchsize):
        batchSamples = testData[offset:offset+batchsize]
        testX = [self.sent2features(s) for s in batchSamples]
        predictions = list()
        for classifier in predictors:
            predictions.append(classifier.predict_marginals(testX))
        addedPredictions = list(map(self.addProb, zip(*predictions)))
        #findmax = lambda s: max(s,value=s.get)
        getval = lambda s: s.get(label)
        votedOutput = [list(map(getval, sentence)) for sentence in addedPredictions]
        if prob == True:
            yield addedPredictions
        else:
            yield votedOutput
    #Make Faster
    def ensemblePred(self,testData,label,prob=True):
        """ Predict using CRF model.
        Args:
            param1 (num): trained predictors
        Returns:
            list: Predicted classes through voting.
        """
        predY = list()
        for i in self.generatePred(self.classifiers, testData, 1000, prob, label):
            predY.extend(i)
            print(".", end="")      
                
        return predY