### 1. Implemente diferentes funções em Python, usando o NumPy, para calcular:
### a) Acurácia
### b) Precisão
### c) Recall
### d) F1-Measure
### e) MAE
### f) RMSE

Observações:

    Cada item acima deve ter uma função própria para calculá-lo.
    Todas as funções recebem como parâmetros de entrada y_true e y_pred
    As funções para cálculo da Precisão, Recall e F1-Measure devem retornar um único valor já com a métrica calculada baseada na média ponderada das classes.
    As funções podem gerar e usar a matriz de confusão usando o scikit learn, mas não podem usar as métricas já implementadas por ele.

In [272]:
import numpy as np

y_pred = [0, 2, 1, 3]
y_true = [0, 1, 2, 3]

### a) acurácia

In [273]:
def accuracy(y_true, y_pred):
    a = 1/(len(y_true))
    x=0
    for elem in y_true:
        if(y_pred[:elem] == y_true[:elem]):
            x = 1*(x+1)
    return a*x

accuracy(y_true, y_pred)

0.5

### b) precisão

In [274]:
from sklearn.metrics import confusion_matrix

def precision(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    
    for i in cm:
        p = np.mean(np.diag(cm)/np.sum(cm[:,0]))
        #p = np.mean(cm[0,0]/np.sum(cm[:,0]))
        
    return p

print(precision(y_true, y_pred))

0.5


### c) recall

In [275]:
def recall(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    
    for i in cm:
        p = np.mean(np.diag(cm)/np.sum(cm[0,:]))
        
    return p

print(recall(y_true, y_pred))

0.5


### d) F1-Measure

In [276]:
def f1(y_true, y_pred):
    pre = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    p = np.mean(2*(pre*rec)/(pre+rec))
    
    return p

f1(y_true, y_pred)

0.5

### e) MAE

In [277]:
import math

def mae(y_true, y_pred):
    a = 1/(len(y_true))
    x=0
    
    for elem in y_true:
        x = x + math.fabs((y_true[elem] - y_pred[elem]))
    return a*x  

mae(y_true, y_pred)

0.5

### f) RMSE

In [278]:
def rmse(y_true, y_pred):
    a = 1/(len(y_true))
    x=0
    
    for elem in y_true:
        x = (y_true[elem] - y_pred[elem])**2 + x
    return a*x

rmse(y_true, y_pred)

0.5

### testes

In [279]:
# acurácia
from sklearn.metrics import accuracy_score
accuracy_score(y_true, y_pred)

0.5

In [280]:
#MAE
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

0.5

In [281]:
#RMSE
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true, y_pred)

0.5

In [282]:
#precisão
from sklearn.metrics import precision_score
precision_score(y_true, y_pred, average='micro')

0.5

In [283]:
#recall
from sklearn.metrics import recall_score
recall_score(y_true, y_pred, average='micro')

0.5

In [284]:
#f1-measure
from sklearn.metrics import f1_score
f1(y_true, y_pred)

0.5

### 2. Calcule Acurácia, Precisão, Recall e F1-Measure para sua solução da questão 2 da Lista 04. Caso não tenha feito a questão 2 da Lista 04 terá que fazê-la agora.

In [285]:
import pandas as pd
df = pd.read_csv('winequality/winequality-white.csv', delimiter=';')

df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [286]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

X = df.drop(['quality'],axis=1) 
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [287]:
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train,y_train)
pred = neigh.predict(X_test)

#y_train.shape 
#pred.shape

k3_precision = precision(y_test, pred)
k3_recall = recall(y_test, pred)
k3_f1 = f1(y_test, pred)

print('Precisão: {:.2f}%.'.format(k3_precision))
print('Recall: {:.2f}%.'.format(k3_recall))
print('F1-Measure: {:.2f}%.'.format(k3_f1))

### OBS: para valores de k>3 dava divisão por 0 no calculo de precisão, recall e f1. Portanto, considerado apenas k=3

Precisão: 16.07%.
Recall: 16.07%.
F1-Measure: 16.07%.


### 3. Calcule MAE e RMSE para sua solução da questão 3.3 da Lista 05. Caso não tenha feito a questão 3.3 da Lista 05 terá que fazê-la agora.

In [288]:
df = pd.read_csv('winequality/winequality-white.csv', delimiter=';')
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [289]:
X = df.drop(['quality'],axis=1) 
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [290]:
from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor(n_neighbors=5)
model.fit(X_train, y_train)

predict = model.predict(X_test)

y_true = y_test.to_numpy()

k5_mae = mae(y_true, predict)

k5_rmse = rmse(y_true, predict)

In [291]:
print('MAE: {:.2f}%'.format(k5_mae*100))

MAE: 57.86%


In [292]:
print('RMSE: {:.2f}%'.format(k5_rmse*100))

RMSE: 58.52%
