# Maestría en Maestría en Ciencia de Datos e Inteligencia Artificial
#### 8. Machine Learning and Deep Learning
#### Docente: Msc. Renzo Claure Aracena.

In [None]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.model_selection import train_test_split
cancer = pd.read_csv('cancer.csv', delimiter = ";", decimal=",")

In [None]:
X = cancer.drop(['Tipo', 'ID'], axis=1)
y = cancer['Tipo'].replace(['M', 'B'], [1,0])

X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# Score de Clasificacion
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

lr = LogisticRegression(max_iter=500).fit(X_train, y_train)
lr_pred = lr.predict(X_test)
confusion = confusion_matrix(y_test, lr_pred)
confusion


In [None]:
lr.score(X_test, y_test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
y_scores_lr = lr.decision_function(X_test)
lr_pred = lr.predict(X_test)
y_score_list = list(zip(y_test[0:20], lr_pred[0:20], y_scores_lr[0:20]))

y_score_list

In [None]:
# Probabilidad de Clasificacion
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
y_proba_lr = lr.fit(X_train, y_train).predict_proba(X_test)
y_proba_list = list(zip(y_test[0:20], lr_pred[0:20], y_proba_lr[0:20,1]))


y_proba_list

In [None]:
lr.score(X_test, y_test)

In [None]:
# Curva Precision Recall
from sklearn.metrics import precision_recall_curve

precision, recall, umbral = precision_recall_curve(y_test, y_scores_lr)
cercano_cero = np.argmin(np.abs(umbral))
cercano_cero_p = precision[cercano_cero]
cercano_cero_r = recall[cercano_cero]

plt.figure()
plt.xlim([0.0, 1.01])
plt.ylim([0.0, 1.01])
plt.plot(precision, recall, label='Precision-Recall Curva')
plt.plot(cercano_cero_p, cercano_cero_r, 'o', markersize = 12, fillstyle = 'none', c='r', mew=3)
plt.xlabel('Precision', fontsize=16)
plt.ylabel('Recall', fontsize=16)
#plt.axes().set_aspect('equal')
plt.show()

In [None]:
#Curva ROC
from sklearn.metrics import roc_curve, auc

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

y_score_rl = lr.fit(X_train, y_train).decision_function(X_test)
fpr_rl, tpr_rl, _ = roc_curve(y_test, y_score_rl)
roc_auc_rl = auc(fpr_rl, tpr_rl)

plt.figure(figsize=(15, 10))
plt.xlim([-0.01, 1.00])
plt.ylim([-0.01, 1.01])
plt.plot(fpr_rl, tpr_rl, lw=3, label='Regresion Log ROC (area = {:0.2f})'.format(roc_auc_rl))
plt.xlabel('Falsos Positivos R.', fontsize=16)
plt.ylabel('Verdaderos Positivos R.', fontsize=16)
plt.title('ROC curva Cancer', fontsize=16)
plt.legend(loc='lower right', fontsize=13)
plt.plot([0, 1], [0, 1], color='red', lw=3, linestyle='--')
#plt.axes().set_aspect('equal')
plt.show()

In [None]:
from matplotlib import cm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Curva ROC para comparar más de un modelo
%matplotlib inline

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

plt.figure(figsize=(15, 10))
plt.xlim([-0.01, 1])
plt.ylim([-0.01, 1])
for gi in [0.001, 0.1, 0.2, 0.5, 1 ]:
    svm = SVC(gamma =gi ).fit(X_train, y_train)
    y_score = svm.decision_function(X_test)
    fpr, tpr, _ = roc_curve(y_test, y_score)
    roc_auc = auc(fpr, tpr)
    accuracy = svm.score(X_test, y_test)
    print('Gamma: {:.3f}  Accuracy: {:.3f} Auc: {:.3f}'.format(gi, accuracy, roc_auc))
    plt.plot(fpr, tpr, lw=2, alpha=0.5, label='SVM (gamma = {:0.3f}, area = {:0.3f})'.format(gi, roc_auc))

plt.xlabel('Ratio Falsos Positivos', fontsize=16)
plt.ylabel('Ratio Verdaderos Positivos  (Recall)', fontsize=16)
plt.plot([0, 1], [0, 1], color='k', lw=0.5, linestyle='--')
plt.legend(loc="lower right", fontsize=11)
plt.title('ROC :(Cancer)', fontsize=16)
#plt.axes().set_aspect('equal')

plt.show()
    
    

### Evaluación de modelos de clasificación multinivel

#### Evaluación pseudidicotómica

In [None]:
base = pd.read_csv('COMPRA DE PRODUCTOS.csv', delimiter=";", decimal=',')
base.head()

In [None]:
base = pd.read_csv('COMPRA DE PRODUCTOS.csv', delimiter=";", decimal=',')
X, y = base.drop(['ID', 'GRUPO_PRODUCTO', 'CON_LINEA_CREDITO'], axis=1), base['GRUPO_PRODUCTO']
y.value_counts()

In [None]:
X_train_mc, X_test_mc, y_train_mc, y_test_mc = train_test_split(X, y, random_state=0)

svm = SVC(kernel = 'linear').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, 
                     index = [i for i in range(0,4)], columns = [i for i in range(0,4)])

plt.figure(figsize=(10,8))
sns.heatmap(df_cm, annot=True, fmt='.0f')
plt.title('SVM Kernel Lineal \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc)))
plt.ylabel('Real')
plt.xlabel('Prediccion')


svm = SVC(kernel = 'rbf').fit(X_train_mc, y_train_mc)
svm_predicted_mc = svm.predict(X_test_mc)
confusion_mc = confusion_matrix(y_test_mc, svm_predicted_mc)
df_cm = pd.DataFrame(confusion_mc, index = [i for i in range(0,4)],
                  columns = [i for i in range(0,4)])

plt.figure(figsize = (10,8))
sns.heatmap(df_cm, annot=True, cmap="BuPu")
plt.title('SVM Kernel RBF \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                    svm_predicted_mc)))
plt.ylabel('Real')
plt.xlabel('Prediccion');

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df_cm, annot=True, fmt='.0f', cmap="BuPu")
plt.title('SVM Kernel Lineal \nAccuracy:{0:.3f}'.format(accuracy_score(y_test_mc, 
                                                                       svm_predicted_mc)))
plt.ylabel('Real')
plt.xlabel('Prediccion')

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test_mc, svm_predicted_mc))


#### Macro y Micro precisión, recall y F1

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('Micro precision: {:.2f}'.format(precision_score(y_test_mc, svm_predicted_mc, average='micro')))
print('Macro precision: {:.2f}'.format(precision_score(y_test_mc, svm_predicted_mc, average='macro')))
print('Micro recall: {:.2f}'.format(recall_score(y_test_mc, svm_predicted_mc, average='micro')))
print('Macro recall: {:.2f}'.format(recall_score(y_test_mc, svm_predicted_mc, average='macro')))
print('Micro F1: {:.2f}'.format(f1_score(y_test_mc, svm_predicted_mc, average='micro')))
print('Macro F1: {:.2f}'.format(f1_score(y_test_mc, svm_predicted_mc, average='macro')))

**Realicen el mismo ejercicio con árboles de desición**