## Imports

In [104]:
import math
import numpy as np
import pandas
from catboost import CatBoostClassifier, Pool, metrics, cv
from sklearn.metrics import confusion_matrix, accuracy_score

# Preparando el dataset de entrenamiento

In [105]:
data_train = pandas.read_csv('data/perrosTrainAllComplete.csv')

data_train

Unnamed: 0,Mascota,Edad,Tamaño,Sexo,Patron de pelaje,Color de pelaje 1,Color de pelaje 2,Color de pelaje 3,Largo de pelaje,Color de ojos,Largo de hocico,Largo de cola,Largo de orejas,Tipo de orejas
0,1,Adulto,Mediano,Macho,Bicolor,Blanco,Negro,,Corto,Marron oscuro,Corto,Corta,Cortas,Caidas
1,2,Cachorro,Mediano,Hembra,Bicolor,Blanco,Marron,,Corto,Marron oscuro,Largo,Largo,Cortas,Caidas
2,3,Adulto,Mediano,Macho,Liso,Negro,,,Corto,Marron oscuro,Largo,Largo,Largas,Paradas
3,4,Adulto,Mediano,Hembra,Liso,Dorado,,,Corto,Marron oscuro,Largo,Largo,Cortas,Paradas
4,5,Cachorro,Chico,Hembra,Liso,Negro,,,Corto,Marron oscuro,Corto,Corta,Cortas,Caidas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,1910,Adulto,Chico,Macho,Bicolor,Dorado,Blanco,,Corto,Marron oscuro,Mediano,Mediana,Medianas,Caidas
316,1919,Cachorro,Mediano,Macho,Tricolor,Negro,Blanco,Marron,Largo,Marron oscuro,Corto,Corta,Medianas,Caidas
317,1940,Cachorro,Mediano,Macho,Liso,Marron,,,Corto,Marron claro,Largo,Mediana,Cortas,Caidas
318,1950,Adulto,Chico,Hembra,Bicolor,Negro,Marron,,Largo,Marron oscuro,Mediano,Corta,Largas,Caidas


### Obtenemos la cantidad de valores vacios

In [106]:
null_value_stats = data_train.isnull().sum(axis=0)
null_value_stats[null_value_stats != 0]

Color de pelaje 2    129
Color de pelaje 3    291
dtype: int64

### Lleno los nulos con strings vacios y quito la columna "Mascota"

In [107]:
data_train.fillna("NaN", inplace=True)
prepared_data_train = data_train.drop('Mascota', axis=1)
prepared_data_train

Unnamed: 0,Edad,Tamaño,Sexo,Patron de pelaje,Color de pelaje 1,Color de pelaje 2,Color de pelaje 3,Largo de pelaje,Color de ojos,Largo de hocico,Largo de cola,Largo de orejas,Tipo de orejas
0,Adulto,Mediano,Macho,Bicolor,Blanco,Negro,,Corto,Marron oscuro,Corto,Corta,Cortas,Caidas
1,Cachorro,Mediano,Hembra,Bicolor,Blanco,Marron,,Corto,Marron oscuro,Largo,Largo,Cortas,Caidas
2,Adulto,Mediano,Macho,Liso,Negro,,,Corto,Marron oscuro,Largo,Largo,Largas,Paradas
3,Adulto,Mediano,Hembra,Liso,Dorado,,,Corto,Marron oscuro,Largo,Largo,Cortas,Paradas
4,Cachorro,Chico,Hembra,Liso,Negro,,,Corto,Marron oscuro,Corto,Corta,Cortas,Caidas
...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,Adulto,Chico,Macho,Bicolor,Dorado,Blanco,,Corto,Marron oscuro,Mediano,Mediana,Medianas,Caidas
316,Cachorro,Mediano,Macho,Tricolor,Negro,Blanco,Marron,Largo,Marron oscuro,Corto,Corta,Medianas,Caidas
317,Cachorro,Mediano,Macho,Liso,Marron,,,Corto,Marron claro,Largo,Mediana,Cortas,Caidas
318,Adulto,Chico,Hembra,Bicolor,Negro,Marron,,Largo,Marron oscuro,Mediano,Corta,Largas,Caidas


### Imprimo los tipos de cada columna

In [108]:
print(prepared_data_train.dtypes)


Edad                 object
Tamaño               object
Sexo                 object
Patron de pelaje     object
Color de pelaje 1    object
Color de pelaje 2    object
Color de pelaje 3    object
Largo de pelaje      object
Color de ojos        object
Largo de hocico      object
Largo de cola        object
Largo de orejas      object
Tipo de orejas       object
dtype: object


### Obtengo la lista de etiquetas de las categorias

In [109]:
dataset_labels = prepared_data_train.columns.to_list()
dataset_labels

['Edad',
 'Tamaño',
 'Sexo',
 'Patron de pelaje',
 'Color de pelaje 1',
 'Color de pelaje 2',
 'Color de pelaje 3',
 'Largo de pelaje',
 'Color de ojos',
 'Largo de hocico',
 'Largo de cola',
 'Largo de orejas',
 'Tipo de orejas']

In [110]:
mascotas_train_ids = data_train.Mascota
print(mascotas_train_ids)

0         1
1         2
2         3
3         4
4         5
       ... 
315    1910
316    1919
317    1940
318    1950
319    1954
Name: Mascota, Length: 320, dtype: int64


Obtengo las categorias en base a las que no son numericas, en este caso todas son categorias

In [111]:
categorical_features_indices = np.where(prepared_data_train.dtypes != np.float)[0]
categorical_features_indices

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

# Preparando el dataset de testeo

In [112]:
dataset_test = pandas.read_csv('./data/perrosTestComplete.csv')
dataset_test.head()

Unnamed: 0,Mascota,Edad,Tamaño,Sexo,Patron de pelaje,Color de pelaje 1,Color de pelaje 2,Color de pelaje 3,Largo de pelaje,Color de ojos,Largo de hocico,Largo de cola,Largo de orejas,Tipo de orejas
0,666,Adulto,Grande,Macho,Bicolor,Negro,Blanco,,Corto,Marron oscuro,Largo,Corto,Mediano,Caidas


In [113]:
null_value_stats = dataset_test.isnull().sum(axis=0)
null_value_stats[null_value_stats != 0]

Color de pelaje 3    1
dtype: int64

In [114]:
dataset_test.fillna("NaN", inplace=True)
prepared_dataset_test = dataset_test.drop('Mascota', axis=1)
prepared_dataset_test.head()

Unnamed: 0,Edad,Tamaño,Sexo,Patron de pelaje,Color de pelaje 1,Color de pelaje 2,Color de pelaje 3,Largo de pelaje,Color de ojos,Largo de hocico,Largo de cola,Largo de orejas,Tipo de orejas
0,Adulto,Grande,Macho,Bicolor,Negro,Blanco,,Corto,Marron oscuro,Largo,Corto,Mediano,Caidas


In [115]:
print(prepared_dataset_test.dtypes)

Edad                 object
Tamaño               object
Sexo                 object
Patron de pelaje     object
Color de pelaje 1    object
Color de pelaje 2    object
Color de pelaje 3    object
Largo de pelaje      object
Color de ojos        object
Largo de hocico      object
Largo de cola        object
Largo de orejas      object
Tipo de orejas       object
dtype: object


In [116]:
mascotas_test_ids = dataset_test.Mascota
mascotas_test_ids.head()

0    666
Name: Mascota, dtype: int64

# Creando y entrenando el modelo

### Creacion del modelo

In [117]:
train_dataset = Pool(data=prepared_data_train,
                     label=mascotas_train_ids,
                     cat_features=categorical_features_indices)


In [67]:
train_dataset = Pool(data=prepared_data_train,
                     label=mascotas_train_ids,
                     cat_features=categorical_features_indices)

# Initialize CatBoostClassifier
''' model = CatBoostClassifier(iterations=100,
                           depth=3,
                           loss_function="MultiLogloss") '''

modelSinPesos = CatBoostClassifier(iterations=50,
                            learning_rate=0.25,
                            depth=3,
                            auto_class_weights="Balanced",
                            loss_function="MultiClass")

modelConPesos = CatBoostClassifier(iterations=50,
                            learning_rate=0.25,
                            depth=3,
                            auto_class_weights="Balanced",
                            feature_weights=[0.05,
                                            0.15,
                                            0.03,
                                            0.15,
                                            0.15,
                                            0.15,
                                            0.10,
                                            0.05,
                                            0.05,
                                            0.03,
                                            0.03,
                                            0.03,
                                            0.03],
                            loss_function="MultiClass")



In [71]:
modelMayorPeso = CatBoostClassifier(iterations=50,
                                   learning_rate=0.25,
                                   depth=3,
                                   auto_class_weights="Balanced",
                                   feature_weights=[0.05,
                                                    0.15,
                                                    0.03,
                                                    0.15,
                                                    0.50,
                                                    0.15,
                                                    0.10,
                                                    0.05,
                                                    0.05,
                                                    0.03,
                                                    0.03,
                                                    0.03,
                                                    0.03],
                                   loss_function="MultiClass")

modelMayorPeso.fit(train_dataset)


0:	learn: 5.7588491	total: 8.95s	remaining: 7m 18s
1:	learn: 5.7227960	total: 17.7s	remaining: 7m 5s
2:	learn: 5.7215113	total: 22.4s	remaining: 5m 51s
3:	learn: 5.7099712	total: 30.4s	remaining: 5m 49s
4:	learn: 5.6636929	total: 39.1s	remaining: 5m 51s
5:	learn: 5.6143973	total: 47.7s	remaining: 5m 49s
6:	learn: 5.6121399	total: 52.7s	remaining: 5m 23s
7:	learn: 5.5635351	total: 1m 1s	remaining: 5m 23s
8:	learn: 5.5213396	total: 1m 10s	remaining: 5m 20s
9:	learn: 5.5152030	total: 1m 14s	remaining: 4m 59s
10:	learn: 5.4797116	total: 1m 23s	remaining: 4m 57s
11:	learn: 5.4720420	total: 1m 28s	remaining: 4m 40s
12:	learn: 5.4193894	total: 1m 37s	remaining: 4m 36s
13:	learn: 5.4192012	total: 1m 39s	remaining: 4m 14s
14:	learn: 5.4072322	total: 1m 47s	remaining: 4m 10s
15:	learn: 5.3570995	total: 1m 55s	remaining: 4m 6s
16:	learn: 5.3241291	total: 2m 4s	remaining: 4m 1s
17:	learn: 5.3238933	total: 2m 6s	remaining: 3m 45s
18:	learn: 5.2800097	total: 2m 14s	remaining: 3m 40s
19:	learn: 5.249

<catboost.core.CatBoostClassifier at 0x7ff937115d00>

In [None]:
modelMayoresPesos = CatBoostClassifier(iterations=50,
                                    learning_rate=0.25,
                                    depth=3,
                                    auto_class_weights="Balanced",
                                    feature_weights=[0.20,
                                                     0.50,
                                                     0.10,
                                                     0.50,
                                                     0.50,
                                                     0.50,
                                                     0.10,
                                                     0.20,
                                                     0.20,
                                                     0.10,
                                                     0.10,
                                                     0.10,
                                                     0.10],
                                    loss_function="MultiClass")

modelMayoresPesos.fit(train_dataset)


In [None]:
modelMayoresPesos2 = CatBoostClassifier(iterations=50,
                                       learning_rate=0.25,
                                       depth=3,
                                       auto_class_weights="Balanced",
                                       feature_weights=[0.10,
                                                        0.30,
                                                        0.10,
                                                        0.30,
                                                        0.30,
                                                        0.30,
                                                        0.05,
                                                        0.10,
                                                        0.10,
                                                        0.05,
                                                        0.05,
                                                        0.05,
                                                        0.05],
                                       loss_function="MultiClass")

modelMayoresPesos2.fit(train_dataset)


In [122]:
modelConPesosSinBalanced = CatBoostClassifier(iterations=250,
                                   learning_rate=0.25,
                                   depth=3,
                                   feature_weights=[0.05,
                                                    0.20,
                                                    0.03,
                                                    0.10,
                                                    0.20,
                                                    0.10,
                                                    0.08,
                                                    0.05,
                                                    0.05,
                                                    0.03,
                                                    0.03,
                                                    0.03,
                                                    0.03],
                                   loss_function="MultiClass")

modelConPesosSinBalanced.fit(train_dataset)

Custom logger is already specified. Specify more than one logger at same time is not thread safe.

0:	learn: 5.7356039	total: 25.3s	remaining: 1h 44m 54s
1:	learn: 5.7321722	total: 36.1s	remaining: 1h 14m 37s
2:	learn: 5.7061687	total: 1m	remaining: 1h 22m 20s
3:	learn: 5.6655650	total: 1m 28s	remaining: 1h 30m 13s
4:	learn: 5.6455415	total: 1m 51s	remaining: 1h 31m 26s
5:	learn: 5.6283051	total: 2m 12s	remaining: 1h 29m 36s
6:	learn: 5.5727865	total: 2m 36s	remaining: 1h 30m 16s
7:	learn: 5.5354985	total: 2m 58s	remaining: 1h 30m 7s
8:	learn: 5.5068839	total: 3m 15s	remaining: 1h 27m 22s
9:	learn: 5.4696602	total: 3m 37s	remaining: 1h 26m 51s
10:	learn: 5.4693801	total: 3m 39s	remaining: 1h 19m 28s
11:	learn: 5.4468430	total: 3m 53s	remaining: 1h 17m 18s
12:	learn: 5.4465850	total: 3m 55s	remaining: 1h 11m 36s
13:	learn: 5.4402282	total: 4m 7s	remaining: 1h 9m 27s
14:	learn: 5.3798591	total: 4m 21s	remaining: 1h 8m 9s
15:	learn: 5.3426130	total: 4m 34s	remaining: 1h 7m 1s
16:	learn: 5.3056462	total: 4m 49s	remaining: 1h 6m 2s
17:	learn: 5.3026713	total: 4m 54s	remaining: 1h 3m 19s


<catboost.core.CatBoostClassifier at 0x7ff936fd5f40>

In [68]:
modelSinPesos.fit(train_dataset)
modelConPesos.fit(train_dataset)


Custom logger is already specified. Specify more than one logger at same time is not thread safe.

0:	learn: 5.7334822	total: 9.28s	remaining: 7m 34s
1:	learn: 5.7021220	total: 18.1s	remaining: 7m 14s
2:	learn: 5.6531288	total: 27s	remaining: 7m 3s
3:	learn: 5.6033219	total: 35.9s	remaining: 6m 53s
4:	learn: 5.5875500	total: 44.9s	remaining: 6m 43s
5:	learn: 5.5874762	total: 47.1s	remaining: 5m 45s
6:	learn: 5.5717649	total: 55.9s	remaining: 5m 43s
7:	learn: 5.5676877	total: 1m 4s	remaining: 5m 39s
8:	learn: 5.5208652	total: 1m 13s	remaining: 5m 36s
9:	learn: 5.4712513	total: 1m 22s	remaining: 5m 30s
10:	learn: 5.4272571	total: 1m 31s	remaining: 5m 24s
11:	learn: 5.3794519	total: 1m 40s	remaining: 5m 17s
12:	learn: 5.3226654	total: 1m 48s	remaining: 5m 10s
13:	learn: 5.3173159	total: 1m 57s	remaining: 5m 2s
14:	learn: 5.3016383	total: 2m 6s	remaining: 4m 55s
15:	learn: 5.2789431	total: 2m 15s	remaining: 4m 48s
16:	learn: 5.2339508	total: 2m 24s	remaining: 4m 40s
17:	learn: 5.2110549	total: 2m 33s	remaining: 4m 32s
18:	learn: 5.1736722	total: 2m 42s	remaining: 4m 24s
19:	learn: 5.160

<catboost.core.CatBoostClassifier at 0x7ff937115eb0>

### Cargando un model

In [28]:
# model2 = CatBoostClassifier().load_model("models/model1.cbm", format='cbm')
# model3 = CatBoostClassifier().load_model(
#     "models/modelDepth3.cbm", format='cbm')
# model4 = CatBoostClassifier().load_model(
#     "models/modelDepth3Ite100.cbm", format='cbm')

model_depth3 = CatBoostClassifier().load_model(
    "models/modelDepth3.cbm", format='cbm')

model_100it_D10 = CatBoostClassifier().load_model(
    "models/modelDepth10Ite100.cbm", format='cbm')

model_100it_lr05_D8 = CatBoostClassifier().load_model(
    "models/modelIt100Lr05D8.cbm", format='cbm')

model_2000it_lr015_D3 = CatBoostClassifier().load_model(
    "models/modelIt2000Lr015D3.cbm", format='cbm')

model_Depth6Ite1000 = CatBoostClassifier().load_model(
    "models/modelDepth6Ite1000.cbm", format='cbm')

model_3000_D3lr025 = CatBoostClassifier().load_model(
    "models/modelIt3000D3Lr025.cbm", format='cbm')

model_D4_ite2000 = CatBoostClassifier().load_model(
    "models/modelDepth4Ite2000.cbm", format='cbm')

model_D4_ite2000_bala = CatBoostClassifier().load_model(
    "models/modelDepth4Ite2000Bala.cbm", format='cbm')

model_D4_ite2000_bala_sqrt = CatBoostClassifier().load_model(
    "models/modelDepth4Ite2000BalaSqrt.cbm", format='cbm')


# **Entrenamiento**

In [126]:
''' model.fit(train_dataset)

model.save_model("models/modelIt3000D3Lr025.cbm",
                    format="cbm",
                    export_parameters=None,
                    pool=None) '''

modelConPesosSinBalanced.save_model("models/modelIt250D3_pesos.cbm",
                    format="cbm",
                    export_parameters=None,
                    pool=None)

''' model.save_model("models/modelIt2000Lr015D3.json",
                    format="json",
                    export_parameters=None,
                    pool=None) '''


' model.save_model("models/modelIt2000Lr015D3.json",\n                    format="json",\n                    export_parameters=None,\n                    pool=None) '

# Guardando el model

In [117]:
model.save_model("models/modelDepth3Ite100.cbm",
                    format="cbm",
                    export_parameters=None,
                    pool=None)

model.save_model("models/modelDepth3Ite100.json",
                    format="json",
                    export_parameters=None,
                    pool=None)


CatBoostError: There is no trained model to use save_model(). Use fit() to train model. Then use this method.

# Generar predicciones

In [None]:
eval_dataset = Pool(data=prepared_dataset_test,
                    label=mascotas_test_ids)


In [32]:
# Get predicted classes
# preds_class = model.predict(eval_dataset)
# preds_class2 = model_depth3.predict(eval_dataset)
# preds_class3 = model3.predict(eval_dataset)
# preds_class4 = model4.predict(eval_dataset)
# preds_class_100i_10d = model_100it_10d.predict(eval_dataset)
# preds_class_2000i_3d = model_2000it_3d.predict(eval_dataset)
# preds_class_multi_100 = model.predict(eval_dataset)

# Get predicted probabilities for each class
# preds_proba = model.predict_proba(eval_dataset)
preds_proba_model_depth3 = model_depth3.predict_proba(eval_dataset)

preds_proba_model_100it_D10 = model_100it_D10.predict_proba(eval_dataset)

preds_proba_model_100it_lr05_D8 = model_100it_lr05_D8.predict_proba(eval_dataset)

preds_proba_model_2000it_lr015_D3 = model_2000it_lr015_D3.predict_proba(
    eval_dataset)

preds_proba_modelDepth6Ite1000 = model_Depth6Ite1000.predict_proba(
    eval_dataset)

preds_proba_model_3000_D3lr025 = model_3000_D3lr025.predict_proba(
    eval_dataset)

preds_proba_model_D4_ite2000 = model_D4_ite2000.predict_proba(
    eval_dataset)

preds_proba_model_D4_ite2000_bala = model_D4_ite2000_bala.predict_proba(
    eval_dataset)

preds_proba_model_D4_ite2000_bala_sqrt = model_D4_ite2000_bala_sqrt.predict_proba(
    eval_dataset)


# Importancia de las caracteristicas

In [125]:
# model_depth3.get_feature_importance(prettified=True)
# model_100it_D10.get_feature_importance(prettified=True)
# model_100it_lr05_D8.get_feature_importance(prettified=True)
# model_2000it_lr015_D3.get_feature_importance(prettified=True)
# model_MultiClassOneVsAll_100it_D3.get_feature_importance(prettified=True)
# model_Depth6Ite1000.get_feature_importance(prettified=True)
# print(model_3000_D3lr025.get_feature_importance(prettified=True))
# # model_D4_ite2000.get_feature_importance(prettified=True)
# print("\n")
# print(model_D4_ite2000_bala.get_feature_importance(prettified=True))
# print(model_D4_ite2000_bala_sqrt.get_feature_importance(prettified=True))
# print("SIN PESOS")
# print(modelSinPesos.get_feature_importance(prettified=True))
# print("CON PESOS\n")
# print(modelConPesos.get_feature_importance(prettified=True))
# print("CON MAS PESOS\n")
# print(modelMayorPeso.get_feature_importance(prettified=True))
# print("CON MAS PESOS CALIBRADO 1\n")
# print(modelMayoresPesos.get_feature_importance(prettified=True))
print("CON MAS PESOS CALIBRADO SIN BALANCED\n")
print(modelConPesosSinBalanced.get_feature_importance(prettified=True))

CON MAS PESOS CALIBRADO SIN BALANCED

           Feature Id  Importances
0   Color de pelaje 1    49.455862
1              Tamaño    45.170790
2       Color de ojos     1.409420
3     Largo de hocico     1.241698
4       Largo de cola     1.221465
5                Sexo     1.045537
6   Color de pelaje 3     0.455227
7                Edad     0.000000
8    Patron de pelaje     0.000000
9   Color de pelaje 2     0.000000
10    Largo de pelaje     0.000000
11    Largo de orejas     0.000000
12     Tipo de orejas     0.000000


Custom logger is already specified. Specify more than one logger at same time is not thread safe.

In [119]:
print(modelConPesosSinBalanced.get_feature_importance(prettified=True))

           Feature Id  Importances
0    Patron de pelaje    74.102389
1     Largo de pelaje    11.396624
2     Largo de hocico     5.504249
3     Largo de orejas     4.983779
4       Color de ojos     4.012959
5                Edad     0.000000
6              Tamaño     0.000000
7                Sexo     0.000000
8   Color de pelaje 1     0.000000
9   Color de pelaje 2     0.000000
10  Color de pelaje 3     0.000000
11      Largo de cola     0.000000
12     Tipo de orejas     0.000000


In [101]:
''' print("1: ",preds_class)
print("2: ",preds_class2)
print("3: ",preds_class3)
print("4: ",preds_class4)
print("5: ",preds_class_100i_10d) '''

' print("1: ",preds_class)\nprint("2: ",preds_class2)\nprint("3: ",preds_class3)\nprint("4: ",preds_class4)\nprint("5: ",preds_class_100i_10d) '

In [245]:
print(len(preds_proba_model_depth3))

5


## Probabilidades
Un array por cada mascota de testeo

In [36]:
# print(preds_proba[0])
# print(preds_proba2[0])
# print(preds_proba3[0])
# print(preds_proba4[0])
# print(preds_proba_2000i_3d[0])

[6.26140509e-04 1.69331875e-04 4.03383182e-04 1.14481702e-04
 2.38157270e-03 6.06536351e-04 9.27039312e-05 1.57059303e-04
 1.04281290e-04 1.15267865e-04 2.89913448e-04 4.79095100e-04
 3.03035637e-04 1.58926783e-04 5.16031867e-05 1.25123122e-04
 4.51938462e-03 3.42525064e-04 3.30321159e-04 1.05464628e-04
 2.02569538e-04 6.22343097e-05 8.70825916e-04 2.48598575e-02
 3.47441732e-04 1.64427712e-02 1.78428490e-03 7.33432121e-05
 1.27363457e-03 5.05813639e-03 2.91240231e-03 1.37676113e-04
 2.44360820e-04 7.39147673e-05 7.22971088e-04 5.99907598e-05
 3.50249010e-04 1.75633165e-04 3.02062239e-04 2.93627393e-03
 2.41822542e-04 3.92227001e-04 1.67515060e-04 1.47005780e-03
 1.32848050e-03 9.32283694e-05 9.53372801e-05 1.01569044e-03
 1.76356331e-03 8.41098752e-04 3.06966075e-04 2.29711042e-04
 2.07694115e-04 1.80506664e-04 2.03295538e-03 2.80723568e-03
 7.99760937e-05 1.07930930e-02 2.26764655e-04 1.22160712e-04
 1.75744995e-04 2.03827488e-04 6.02994947e-04 1.64438087e-02
 1.05469912e-04 2.844797

In [90]:
''' contador = 0
while contador < 320:
    if(preds_proba_100i_10d[0][contador] == 0.031220734475029422):
        print(contador, " --> ", preds_proba_100i_10d[0][contador])
    contador = contador +1

print(preds_proba_100i_10d[0][116]) '''

id_mascota = np.where(data_train.Mascota == 3789)[0]

# print(preds_proba_100i_10d[0][id_mascota])


[0.00375544]


### Supuestas predicciones

In [23]:
mascota_numero = 0

for prediccion in preds_class:
    probabilidad = math.trunc((preds_proba[mascota_numero][prediccion.index()])*100)
    print("-- Mascota "+str(mascotas_test_ids[mascota_numero])+" es similar a la mascota "+str(prediccion.item())
        +" => "+str(probabilidad)+"% de probabilidad")
    # print(str(preds_proba[mascota_numero]))
    print(dataset_test.loc[mascota_numero:mascota_numero, "Edad":"Tipo de orejas"].values)
    print(prepared_data_train.loc[prediccion.item():prediccion.item(), "Edad":"Tipo de orejas"].values)
    mascota_numero+=1


AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [40]:
class Mascota:
    def __init__(self,index, id, prob):
        self.id = id
        self.index = index
        self.prob = prob

    def to_string(self):
        return "{}\t|\t{}\t=>\t{}%".format(self.index,self.id,self.prob)

    def get_id(self):
        return self.id
    
    def get_prob(self):
        return self.prob

    def get_index(self):
        return self.index

# KeyValue

In [61]:
def keyValue(list_proba):
    mascota_test_index = 50
    # mascota_probs = list_proba[0]

    mascota_test_id = str(mascotas_test_ids[mascota_test_index].item())
    print(mascota_test_id)
    
    mascotas_probs = []

    for prob_index in range(0, len(list_proba[mascota_test_index])):
        probabilidad = list_proba[mascota_test_index][prob_index]*100
        # probabilidad = list_proba[mascota_test_index][prob_index]

        # print(probabilidad)
        # print(mascotas_train_ids[prob_index], " --> ",
            #   list_proba[0][prob_index], " >> ", probabilidad)
        # print(list_proba[mascota_test_index][41])
        # print(mascotas_train_ids[prob_index],"=>","{0:.2f}".format(probabilidad),"%")

        mascotas_probs.append(
            Mascota(prob_index,mascotas_train_ids[prob_index], probabilidad))
        
    # print(len(mascotas_probs))

    def orderProb(n):
        return n.get_prob()

    def orderId(n):
        return n.get_id()

    def orderIndex(n):
        return n.get_index()

    mascotas_probs.sort(key=orderProb, reverse=True)

    for mascota in mascotas_probs:
        print(mascota.to_string())
    # print(mascotas_probs[0].to_string())


# keyValue(preds_proba_model_depth3)
# keyValue(preds_proba_model_100it_D10)
# keyValue(preds_proba_model_100it_lr05_D8)
# keyValue(preds_proba_model_2000it_lr015_D3)
# keyValue(preds_proba_model_MultiClassOneVsAll_100it_D3)
# keyValue(preds_proba_modelDepth6Ite1000)
# keyValue(preds_proba_model_3000_D3lr025)
# keyValue(preds_proba_model_D4_ite2000)
keyValue(preds_proba_model_D4_ite2000_bala)
# keyValue(preds_proba_model_D4_ite2000_bala_sqrt)


60
23	|	24	=>	5.204187953335905%
307	|	1863	=>	2.976768323593086%
269	|	1640	=>	2.675697499368698%
25	|	26	=>	2.037600870970163%
16	|	17	=>	1.9200589842091969%
87	|	3485	=>	1.7494488543746072%
260	|	4491	=>	1.7143955197237448%
263	|	4492	=>	1.4677213616709694%
44	|	45	=>	1.439932170988759%
192	|	4340	=>	1.4274614740731144%
48	|	49	=>	1.3852456843255507%
4	|	5	=>	1.3195712950104739%
302	|	1821	=>	1.2396049748416853%
287	|	1702	=>	1.2309561690125324%
50	|	60	=>	1.1519147498105917%
158	|	4194	=>	1.1207721220061522%
144	|	4073	=>	1.0429603409390868%
237	|	6049	=>	1.0136528466376602%
201	|	4352	=>	0.9952329474947906%
30	|	31	=>	0.9804372385864%
225	|	6045	=>	0.9565640221951092%
214	|	4391	=>	0.9375398029982823%
39	|	40	=>	0.9263418514420467%
0	|	1	=>	0.8878747750122836%
219	|	6041	=>	0.8724212266067408%
49	|	50	=>	0.8633247589014783%
191	|	1460	=>	0.8401070928124106%
180	|	1434	=>	0.8211678895748489%
29	|	30	=>	0.8096180097308758%
55	|	62	=>	0.8075776889152828%
28	|	29	=>	0.7809954797012607

In [63]:
customers = ['Kaley Fernandez', 'Darius Rowland',
             'Isaac Borthwick',  'Alexandria Kidd']
sorted_customers = sorted(customers)
print(sorted_customers)


['Alexandria Kidd', 'Darius Rowland', 'Isaac Borthwick', 'Kaley Fernandez']
