# CLASSIFYING WINE DATASET

## General Imports and Settings

In [1]:
import numpy as np 
import scipy as sp 
from scipy import stats
from sklearn import datasets
from sklearn.model_selection import cross_val_predict, cross_val_score, RepeatedStratifiedKFold
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
num_folds = 10
num_rep = 3
exec_time = datetime.today().strftime('%Y-%m-%d %H:%M:%S')

### Leitura do dataset

In [3]:
wine_ds = datasets.load_wine()

wine_x = wine_ds.data
wine_y = wine_ds.target

## ZERO R

### Configuração do classificador

In [4]:
from sklearn.dummy import DummyClassifier

zr = DummyClassifier(strategy="stratified")
rskf = RepeatedStratifiedKFold(n_splits = num_folds, n_repeats = num_rep, random_state = 36851234)

### Classificação e obtenção dos resultados

In [8]:
zr_scores = cross_val_score(zr, wine_x, wine_y, scoring='accuracy', cv=rskf)
#print(score)

zr_score_mean = zr_scores.mean()
zr_score_std = zr_scores.std()
zr_score_low, zr_score_upp = stats.norm.interval(0.95, loc=zr_score_mean, scale=zr_score_std/np.sqrt(len(zr_scores)))

print("Zero R (mean, std, low, upp): \n", zr_score_mean, zr_score_std, zr_score_low, zr_score_upp)

Zero R (mean, std, low, upp): 
 0.2961873638344226 0.10964197042878235 0.2569532095550652 0.33542151811378007


## K NEAREST NEIGHBOR

### Configuração do classificador

In [6]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
rskf = RepeatedStratifiedKFold(n_splits = num_folds, n_repeats = num_rep, random_state = 36851234)

### Classificação e obtenção dos resultados

In [9]:
knn_scores = cross_val_score(knn, wine_x, wine_y, scoring='accuracy', cv=rskf)
#print(score)

knn_score_mean = knn_scores.mean()
knn_score_std = knn_scores.std()
knn_score_low, knn_score_upp = stats.norm.interval(0.95, loc=knn_score_mean, scale=knn_score_std/np.sqrt(len(knn_scores)))

print("KNN (mean, std, low, upp): \n",knn_score_mean, knn_score_std, knn_score_low, knn_score_upp)

KNN (mean, std, low, upp): 
 0.6974945533769062 0.10891617525307232 0.6585201167931325 0.73646898996068


## Results

In [11]:
with open("../results/results.md", "w") as rf: #opening result file
    print("# Results Obtained", file=rf)
    print("## Configuration\n", file=rf)
    print("Num of folds: ", num_folds, "\n", file=rf)
    print("Num of repetitions: ", num_rep, "\n", file=rf)
    print("Test run on: ", exec_time, file=rf)
    
    print("## Zero R\n", file=rf)
    print("Mean Value: ", str(zr_score_mean), "\n", file=rf)
    print("Standart Variation: ", str(zr_score_std), "\n", file=rf)
    print("Lower limit: ", str(zr_score_low), "\n", file=rf)
    print("Upper limit: ", str(zr_score_upp), "\n", file=rf)

    print("## K NEAREST NEIGHBORS\n", file=rf)
    print("Mean Value: ", str(knn_score_mean), "\n", file=rf)
    print("Standart Variation: ", str(knn_score_std), "\n", file=rf)
    print("Lower limit: ", str(knn_score_low), "\n", file=rf)
    print("Upper limit: ", str(knn_score_upp), "\n", file=rf)