In [46]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn import metrics, model_selection
from sklearn.metrics import f1_score, recall_score, precision_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.utils.multiclass import unique_labels

from sklearn.dummy import DummyClassifier
from sklearn.svm import SVC  # support vector machine classifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB  # naive bayes
from sklearn.neighbors import KNeighborsClassifier

from pydotplus import graph_from_dot_data
from sklearn.tree import export_graphviz

def run_classifier(clf, X, y, num_tests=100):
    metrics = {'f1-score': [], 'precision': [], 'recall': []}
        
    for _ in range(num_tests):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=37, stratify=y)
        
        clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)
        y_train_pred = clf.predict(X_train)
        
        metrics['y_pred'] = predictions
        metrics['y_prob'] = clf.predict_proba(X_test)[:,1]
        metrics['f1-score'].append(f1_score(y_test, predictions)) 
        metrics['recall'].append(recall_score(y_test, predictions))
        metrics['precision'].append(precision_score(y_test, predictions))
    
    return metrics

SismosEgresosCompleto = 'F:\\Documentos\\Informática\\Maestria - 01 Primer Semestre\\cc5206 - Introducción a la Minería de Datos\\Proyecto - hito 2\\r_sismos_chile\\src\\hito3\\SismosEgresosClasificador.csv'
sismosEgresos5 = 'F:\\Documentos\\Informática\\Maestria - 01 Primer Semestre\\cc5206 - Introducción a la Minería de Datos\\Proyecto - hito 2\\r_sismos_chile\\src\\hito3\\sismosEgr5_7d.csv'
sismosEgresos6 = 'F:\\Documentos\\Informática\\Maestria - 01 Primer Semestre\\cc5206 - Introducción a la Minería de Datos\\Proyecto - hito 2\\r_sismos_chile\\src\\hito3\\sismosEgr6_7d.csv'
sismosEgresos7 = 'F:\\Documentos\\Informática\\Maestria - 01 Primer Semestre\\cc5206 - Introducción a la Minería de Datos\\Proyecto - hito 2\\r_sismos_chile\\src\\hito3\\sismosEgr7_7d.csv'

data = pd.read_csv(SismosEgresosCompleto)  # abrimos el archivo csv y lo cargamos en data.
data5 = pd.read_csv(sismosEgresos5)
data6 = pd.read_csv(sismosEgresos6)
data7 = pd.read_csv(sismosEgresos7)

names = list(data)
for name in names:
    if "Unnamed" in name:
        data.pop(name)
#print(data.columns)

print("librerias y datos cargados exitosamente!")

librerias y datos cargados exitosamente!


In [47]:
X = data[['Latitude', 'Longitude', 'Depth', 'Magnitude', 'Region_Arica_Parinacota', 'Region_Tarapaca', 
          'Region_Antofagasta', 'Region_Atacama', 'Region_Coquimbo', 'Region_Valparaiso', 'Region_Metropolitana_Santiago', 
          'Region_Libertador_General_OHiggins', 'Region_Maule', 'Region_Nuble', 'Region_Biobio', 'Region_Araucania', 
          'Region_Rios', 'Region_Lagos', 'Region_Aysen_General_Carlos_Ibanez', 'Region_Magallanes_Antartica']] 
y = data['class']


X_5 = data5[['Latitude', 'Longitude', 'Depth', 'Magnitude', 'Region_Arica_Parinacota', 'Region_Tarapaca', 
          'Region_Antofagasta', 'Region_Atacama', 'Region_Coquimbo', 'Region_Valparaiso', 'Region_Metropolitana_Santiago', 
          'Region_Libertador_General_OHiggins', 'Region_Maule', 'Region_Nuble', 'Region_Biobio', 'Region_Araucania', 
          'Region_Rios', 'Region_Lagos', 'Region_Aysen_General_Carlos_Ibanez', 'Region_Magallanes_Antartica']] 
y_5 = data5['class']


X_6 = data6[['Latitude', 'Longitude', 'Depth', 'Magnitude', 'Region_Arica_Parinacota', 'Region_Tarapaca', 
          'Region_Antofagasta', 'Region_Atacama', 'Region_Coquimbo', 'Region_Valparaiso', 'Region_Metropolitana_Santiago', 
          'Region_Libertador_General_OHiggins', 'Region_Maule', 'Region_Nuble', 'Region_Biobio', 'Region_Araucania', 
          'Region_Rios', 'Region_Lagos', 'Region_Aysen_General_Carlos_Ibanez', 'Region_Magallanes_Antartica']] 
y_6 = data6['class']


X_7 = data7[['Latitude', 'Longitude', 'Depth', 'Magnitude', 'Region_Arica_Parinacota', 'Region_Tarapaca', 
          'Region_Antofagasta', 'Region_Atacama', 'Region_Coquimbo', 'Region_Valparaiso', 'Region_Metropolitana_Santiago', 
          'Region_Libertador_General_OHiggins', 'Region_Maule', 'Region_Nuble', 'Region_Biobio', 'Region_Araucania', 
          'Region_Rios', 'Region_Lagos', 'Region_Aysen_General_Carlos_Ibanez', 'Region_Magallanes_Antartica']] 
y_7 = data7['class']

col_names = ['Latitude', 'Longitude', 'Depth', 'Magnitude', 'Region_Arica_Parinacota', 'Region_Tarapaca',
             'Region_Antofagasta', 'Region_Atacama', 'Region_Coquimbo', 'Region_Valparaiso', 'Region_Metropolitana_Santiago',
             'Region_Libertador_General_OHiggins', 'Region_Maule', 'Region_Nuble', 'Region_Biobio', 'Region_Araucania',
             'Region_Rios', 'Region_Lagos', 'Region_Aysen_General_Carlos_Ibanez', 'Region_Magallanes_Antartica']

## ---------------------------------------- Datos sin region ---------------------------------------- 

XnoReg = data[['Latitude', 'Longitude', 'Depth', 'Magnitude']] 
ynoReg = data['class']


X_5noReg = data5[['Latitude', 'Longitude', 'Depth', 'Magnitude']] 
y_5noReg = data5['class']


X_6noReg = data6[['Latitude', 'Longitude', 'Depth', 'Magnitude']] 
y_6noReg = data6['class']


X_7noReg = data7[['Latitude', 'Longitude', 'Depth', 'Magnitude']] 
y_7noReg = data7['class']

col_namesnoReg = ['Latitude', 'Longitude', 'Depth', 'Magnitude']

print("separacion de datos de clases completa?")

separacion de datos de clases completa?


In [53]:
# ====================== Clasificadores con region distintas alturas ======================
c0 = ("SismosEgresos completos altura 3", DecisionTreeClassifier(max_depth = 3), X, y, col_names)
c1 = ("SismosEgresos completos altura 4", DecisionTreeClassifier(max_depth = 4), X, y, col_names)
c2 = ("SismosEgresos completos altura 5", DecisionTreeClassifier(max_depth = 5), X, y, col_names)
c3 = ("SismosEgresos completos altura 6", DecisionTreeClassifier(max_depth = 6), X, y, col_names)

c4 = ("SismosEgresos mag 5 altura 4", DecisionTreeClassifier(max_depth = 4), X_5, y_5, col_names)
c5 = ("SismosEgresos mag 5 altura 5", DecisionTreeClassifier(max_depth = 5), X_5, y_5, col_names)
c6 = ("SismosEgresos mag 5 altura 6", DecisionTreeClassifier(max_depth = 6), X_5, y_5, col_names)

c7 = ("SismosEgresos mag 6 altura 4", DecisionTreeClassifier(max_depth = 4), X_6, y_6, col_names)
c8 = ("SismosEgresos mag 6 altura 5", DecisionTreeClassifier(max_depth = 5), X_6, y_6, col_names)
c9 = ("SismosEgresos mag 6 altura 6", DecisionTreeClassifier(max_depth = 6), X_6, y_6, col_names)

c10 = ("SismosEgresos mag 7 altura 3", DecisionTreeClassifier(max_depth = 3), X_7, y_7, col_names)
c11 = ("SismosEgresos mag 7 altura 4", DecisionTreeClassifier(max_depth = 4), X_7, y_7, col_names)
c12 = ("SismosEgresos mag 7 altura 5", DecisionTreeClassifier(max_depth = 5), X_7, y_7, col_names)

# ====================== Clasificadores sin region distintas alturas ======================

d0 = ("SismosEgresos completos sin region altura 3", DecisionTreeClassifier(max_depth = 3), XnoReg, ynoReg, col_namesnoReg)
d1 = ("SismosEgresos completos sin region altura 4", DecisionTreeClassifier(max_depth = 4), XnoReg, ynoReg, col_namesnoReg)
d2 = ("SismosEgresos completos sin region altura 5", DecisionTreeClassifier(max_depth = 5), XnoReg, ynoReg, col_namesnoReg)
d3 = ("SismosEgresos completos sin region altura 6", DecisionTreeClassifier(max_depth = 6), XnoReg, ynoReg, col_namesnoReg)

d4 = ("SismosEgresos mag 5 sin region altura 4", DecisionTreeClassifier(max_depth = 4), X_5noReg, y_5noReg, col_namesnoReg)
d5 = ("SismosEgresos mag 5 sin region altura 5", DecisionTreeClassifier(max_depth = 5), X_5noReg, y_5noReg, col_namesnoReg)
d6 = ("SismosEgresos mag 5 sin region altura 6", DecisionTreeClassifier(max_depth = 6), X_5noReg, y_5noReg, col_namesnoReg)

d7 = ("SismosEgresos mag 6 sin region altura 4", DecisionTreeClassifier(max_depth = 4), X_6noReg, y_6noReg, col_namesnoReg)
d8 = ("SismosEgresos mag 6 sin region altura 5", DecisionTreeClassifier(max_depth = 5), X_6noReg, y_6noReg, col_namesnoReg)
d9 = ("SismosEgresos mag 6 sin region altura 6", DecisionTreeClassifier(max_depth = 6), X_6noReg, y_6noReg, col_namesnoReg)

d10 = ("SismosEgresos mag 7 sin region altura 3", DecisionTreeClassifier(max_depth = 3), X_7noReg, y_7noReg, col_namesnoReg)
d11 = ("SismosEgresos mag 7 sin region altura 4", DecisionTreeClassifier(max_depth = 4), X_7noReg, y_7noReg, col_namesnoReg)
d12 = ("SismosEgresos mag 7 sin region altura 5", DecisionTreeClassifier(max_depth = 5), X_7noReg, y_7noReg, col_namesnoReg)

classifiers = [c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
               d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12]

print("clasificadores creados!")

clasificadores creados!


In [54]:
results = {}
for title, clf, dataX, dataY, names in classifiers:
    metrics = run_classifier(clf, dataX, dataY)   # hay que implementarla en el bloque anterior.
    results[name] = metrics
    print("----------------")
    print("Resultados para clasificador: ", title) 
    print("Precision promedio:",np.array(metrics['precision']).mean())
    print("Recall promedio:",np.array(metrics['recall']).mean())
    print("F1-score promedio:",np.array(metrics['f1-score']).mean())
    print("----------------\n\n")
    
    dot_data = export_graphviz(clf, feature_names = names)
    graph = graph_from_dot_data(dot_data)
    graph.write_png(title + '.png')
print("generación completa")

----------------
Resultados para clasificador:  SismosEgresos completos altura 3
Precision promedio: 0.5203619909502263
Recall promedio: 0.09770603228547149
F1-score promedio: 0.16452074391988553
----------------


----------------
Resultados para clasificador:  SismosEgresos completos altura 4
Precision promedio: 0.503968253968254
Recall promedio: 0.10790144435004251
F1-score promedio: 0.1777466759972009
----------------


----------------
Resultados para clasificador:  SismosEgresos completos altura 5
Precision promedio: 0.5016835016835017
Recall promedio: 0.12659303313508924
F1-score promedio: 0.2021709633649932
----------------


----------------
Resultados para clasificador:  SismosEgresos completos altura 6
Precision promedio: 0.5211926630215207
Recall promedio: 0.12074766355140185
F1-score promedio: 0.19606965666657783
----------------


----------------
Resultados para clasificador:  SismosEgresos mag 5 altura 4
Precision promedio: 0.24350000000000002
Recall promedio: 0.048
F1-