In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline  
import seaborn as sns

In [2]:
from bayessians.discriminant import QDAClassifier, LDAClassifier
from bayessians.naive_bayes import GaussianNaiveBayesClassifier, CustomNaiveBayesClassifier
from bayessians.validator import ModelValidator
from bayessians.archimedean import FrankCopulaClassifier, ClaytonCopulaClassifier, GumbelCopulaClassifier
from bayessians.elliptical import StudentCopulaClassifier, GaussianCopulaClassifier
from bayessians.distribution_estimator import DistributionEstimator
from bayessians.data_splitter import DatasetSplitter

In [3]:
def txt_to_csv(filePath):
    m = []
    with open(filePath, 'r') as f:
        for line in f:
            parsed_line = [float(a) for a in line.split()]
            m.append(parsed_line)
    m = np.array(m).T
    n_features = m.shape[0] - 1
    data = { 'X{}'.format(r + 1) : m[r] for r in range(n_features)}
    data['Y'] = m[n_features].astype(int)
    return pd.DataFrame.from_dict(data)

In [4]:
np.seterr('raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [5]:
iris = pd.read_csv('data/iris.csv')
X_iris = iris.iloc[:,0:4]
y_iris = iris['species'].astype('category').cat.codes
iris_data = (X_iris, y_iris)

In [6]:
glass = pd.read_csv('data/glass.csv')
glass_feature_subset = ['Na', 'Mg', 'Al', 'Si', 'Ca']
X_glass = glass[glass_feature_subset]
y_glass = glass['Type']
glass_data = (X_glass, y_glass)

In [7]:
pathbased = txt_to_csv('data/pathbased.txt')
X_pathbased = pathbased.iloc[:, 0:2]
y_pathbased = pathbased['Y'].astype('category').cat.codes
pathbased_data = (X_pathbased, y_pathbased)

In [8]:
wine = pd.read_csv('data/wine.csv')
wine_features = ['fix_acidity', 'vol_acidity', 'residual_sugar', 'free_sulfur_dioxide', 'sulphates', 'alcohol']
X_wine = wine.iloc[:,0:11]
y_wine = wine['quality']
wine_data = (X_wine, y_wine)

In [None]:
plt.rcParams.update({'font.size': 16})

In [None]:
ModelValidator(GaussianCopulaClassifier, wine_data, dist='gamma', kernel='triangle').estimate_model_quality()

In [None]:
ModelValidator(StudentCopulaClassifier, wine_data, dist='fit', kernel='rectangle').estimate_model_quality()

In [None]:
ModelValidator(StudentCopulaClassifier, glass_data, dist='kernel', kernel='rectangle').estimate_model_quality()

In [None]:
ModelValidator(StudentCopulaClassifier, wine_data, dist='kernel', kernel='triangle').estimate_model_quality()

In [None]:
ModelValidator(GaussianNaiveBayesClassifier, wine_data, dist='kernel', kernel='rectangle').estimate_model_quality()

In [None]:
ModelValidator(StudentCopulaClassifier, glass_data, dist='kernel', kernel='cosinus').estimate_model_quality()

In [None]:
ModelValidator(GaussianCopulaClassifier, glass_data, dist='kernel', h='silverman').estimate_model_quality()

In [None]:
ModelValidator(GaussianCopulaClassifier, wine_data, h='silverman', kernel='quartic').estimate_model_quality()

In [9]:
ModelValidator(GaussianCopulaClassifier, glass_data, dist='genextreme', h='silverman').estimate_model_quality()

(0.6214765100671141, 0.021891954940000125, 0.56, 0.060451946782733836)

In [None]:
ds = DatasetSplitter(X_pathbased, y_pathbased)

In [None]:
from scipy.stats import shapiro

split = ds.splitted_data[2]
print(split.label)
np.round(split.values.apply(shapiro), 2)

In [None]:
ModelValidator(GaussianCopulaClassifier, glass_data, h='skott').estimate_model_quality()

In [None]:
ModelValidator(GaussianCopulaClassifier, glass_data, h=1.5).estimate_model_quality()