In [None]:
import random
from collections import Counter
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import (
    classification_report,
    precision_score,
    recall_score,
    f1_score,
    accuracy_score
)
from sklearn.pipeline import Pipeline
from tqdm import tqdm
from trainer.classifier_trainer import ClassifierTrainer

In [None]:
df_fac = pd.read_csv('./dataset/mfeat-fac.csv', header=None, delim_whitespace=True)
df_fou = pd.read_csv('./dataset/mfeat-fou.csv', header=None, delim_whitespace=True)
df_zer = pd.read_csv('./dataset/mfeat-zer.csv', header=None, delim_whitespace=True)

In [None]:
print(df_fac.shape)
df_fac.head()

In [None]:
print(df_fou.shape)
df_fou.head()

In [None]:
print(df_zer.shape)
df_zer.head()

In [None]:
def create_labels():
    labels = []
    for i in range(10):
        labels.extend([i] * 200)
    return labels

In [None]:
labels = create_labels()
df_fac["label"] = labels
df_fou["label"] = labels
df_zer["label"] = labels

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin

class ParzenMulticlass(ClassifierMixin, BaseEstimator):
  def __init__(self, n_class, bandwidth, **params):
    super().__init__()
    self.n_class = n_class
    self.means = []
    self.x_class = []
    self.bandwidth = bandwidth 

  def fit(self, X: np.array, y=None):
    
    for c in range(self.n_class):
      _X = X[y==c]
      if _X.shape[0] > 0:
        self.means.append(_X.mean(0))
        self.x_class.append(_X)
    return self


  def windows_parzen(self, X, mean, h):
    parzen = np.zeros(X.shape)
    
    dif = np.abs(X - mean)
    parzen = dif

    summaries_parzen = np.sum(parzen, 1)/(mean.shape[0]*h)
    
    #kernel gaussian
    return np.exp(-.5*summaries_parzen**2)/((2*np.pi)**2)
  
  def prod_class(self, c, h):
      def prod(X, ):
        n, p = self.x_class[c].shape
        dif = np.abs(X - self.x_class[c])/h
        return np.sum(dif)/(n*(h**p))
      return prod
  def prod_windows_parzen(self, X, h):
    summaries_parzen = np.array([np.apply_along_axis(self.prod_class(i, h), axis = 1, arr = X) for i in range(self.n_class)]).T
    
    return np.exp(-.5*(summaries_parzen**2))/((2*np.pi)**(1/2))
  
  def prod_score_samples(self, X):
    return self.prod_windows_parzen(X, self.bandwidth)#.argmin(1)
  def predict(self, X):
    return self.prod_score_samples(X).argmax(1)
  def sigma(self, X):
    # mathematics great value
    s = np.std(X, 0)
    m_s = np.mean(s)
    best_h = 1.06*m_s/(X.shape[0]**(1/5))
    return best_h 

In [None]:
datasets = [df_fac, df_fou, df_zer]

trainer = ClassifierTrainer(datasets, 
                            model_create=ParzenMulticlass(n_class=10, bandwidth=1.),
                            param_grid={
                              "clf__bandwidth": np.arange(.99,1.1, .01),
                            })
trainer.train_classifiers_with_random_states(n_iterations=30)