# Bibliotecas

In [1]:
# Bibliotecas a utilizar 
import numpy                   as     np                 # Uso de álgebra lineal, funciones vectoriales
import pandas                  as     pd                 # Trabajar con DataFrames
import matplotlib.pyplot       as     plt                # Para realizar gráficas
import seaborn                 as     sns                # Otras funciones para graficar

from sklearn.linear_model          import LogisticRegression            # Model Logistico
from sklearn.naive_bayes           import GaussianNB                    # Modelo NaiveBayes
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis    # Modelo LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis # Modelo QDA

from sklearn.metrics               import accuracy_score     # Función para calcular la precisión de clasificación
from sklearn.metrics               import confusion_matrix   # Función para calcular una matriz de confusión
from sklearn.model_selection       import KFold              # Función para realizar K-Fold Cross Validation
from sklearn.model_selection       import cross_val_score    # Función para realizar K-Fold-Cross Validation 

import warnings
warnings.filterwarnings('ignore')
sns.set()

# Análisis Descriptivo

In [2]:
# Leemos nuestros archivos 
#datos = pd.read_csv("iris.csv")   # Datos con 3 clases
datos = pd.read_csv("rock.csv")   # Datos con 12 clases
datos.sample(10)

In [3]:
datos.groupby(by="type_desc")["area"].count()

In [4]:
sns.pairplot(datos, hue = "type_desc", height=2, aspect=2)
plt.plot()

In [5]:
matriz_diseño = datos.iloc[:,0:4]
y_real        = datos.iloc[:,  4]
matriz_diseño.head()

# Análisis de modelos individuales

# Análisis de sobreajuste 

In [6]:
modelo     = LogisticRegression(C=1.0, penalty="l2",fit_intercept=True,class_weight=None)
k_fold     = KFold(n_splits=10, shuffle=True, random_state=0)
score      = cross_val_score( modelo, matriz_diseño, y_real, cv=k_fold, n_jobs=1, scoring="accuracy",)
print( "Precisiones:         ", [ str( np.round(100*x,1) ) + "%" for x in score] )
print( "Precisión Min y Max: ", [ str( np.round(100*x,1) ) + "%" for x in [np.min(score),np.max(score)] ] )
print( "Precisión puntual:   ", [ str( np.round(100*x,1) ) + "%" for x in [ np.mean(score) ] ]  )

In [7]:
modelo     = GaussianNB(priors=None, var_smoothing=1e-09)
k_fold     = KFold(n_splits=10, shuffle=True, random_state=0)
score      = cross_val_score( modelo, matriz_diseño, y_real, cv=k_fold, n_jobs=1, scoring="accuracy")
print( "Precisiones:         ", [ str( np.round(100*x,1) ) + "%" for x in score] )
print( "Precisión Min y Max: ", [ str( np.round(100*x,1) ) + "%" for x in [np.min(score),np.max(score)] ] )
print( "Precisión puntual:   ", [ str( np.round(100*x,1) ) + "%" for x in [ np.mean(score) ] ]  )

In [8]:
modelo     = LinearDiscriminantAnalysis(n_components=None, tol=0.0001)
k_fold     = KFold(n_splits=10, shuffle=True, random_state=0)
score      = cross_val_score( modelo, matriz_diseño, y_real, cv=k_fold, n_jobs=1, scoring="accuracy")
print( "Precisiones:         ", [ str( np.round(100*x,1) ) + "%" for x in score] )
print( "Precisión Min y Max: ", [ str( np.round(100*x,1) ) + "%" for x in [np.min(score),np.max(score)] ] )
print( "Precisión puntual:   ", [ str( np.round(100*x,1) ) + "%" for x in [ np.mean(score) ] ]  )

In [9]:
modelo     = QuadraticDiscriminantAnalysis(priors=None,reg_param=0.0, store_covariance=False,tol=0.0001,store_covariances=None)
k_fold     = KFold(n_splits=10, shuffle=True, random_state=0)
score      = cross_val_score( modelo, matriz_diseño, y_real, cv=k_fold, n_jobs=1, scoring="accuracy")
print( "Precisiones:         ", [ str( np.round(100*x,1) ) + "%" for x in score] )
print( "Precisión Min y Max: ", [ str( np.round(100*x,1) ) + "%" for x in [np.min(score),np.max(score)] ] )
print( "Precisión puntual:   ", [ str( np.round(100*x,1) ) + "%" for x in [ np.mean(score) ] ]  )

# Modelo final

In [10]:
modelo_final = GaussianNB(priors=None, var_smoothing=1e-09)
modelo_final

In [11]:
modelo_final.fit( X = matriz_diseño, y = y_real )
y_estimada = modelo_final.predict( X = matriz_diseño )
y_estimada[0:10]

In [12]:
print( "Precisión: ", str( np.round( 100*accuracy_score( y_true = y_real, y_pred = y_estimada ),2) ) + "%" )

In [13]:
confusion_matrix(y_true = y_real, y_pred = y_estimada)