In [6]:
import numpy as np 
import pandas as pd 

from sklearn.metrics import cohen_kappa_score, accuracy_score,balanced_accuracy_score

from plotly import express as px

from utils import plot_confusion_matrix, get_artifact_filename

import os

from json import loads

from joblib import load, dump

import optuna
from optuna.artifacts import FileSystemArtifactStore, upload_artifact

In [7]:
# Paths
BASE_DIR = '../'
PATH_TO_TRAIN = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train/train.csv")
PATH_TO_MODELS = os.path.join(BASE_DIR, "work/models")
PATH_TO_TEMP_FILES = os.path.join(BASE_DIR, "work/optuna_temp_artifacts")
PATH_TO_OPTUNA_ARTIFACTS = os.path.join(BASE_DIR, "work/optuna_artifacts")

In [8]:
study_lgb = optuna.create_study(direction='maximize',
                            storage="sqlite:///../work/db.sqlite3",  # Specify the storage URL here.
                            study_name="04 - LGB Multiclass CV",
                            load_if_exists = True)

# Get the artifact filename
artifact_filename = get_artifact_filename(study_lgb,'test')

# Combine the path and filename
artifact_path = os.path.join(PATH_TO_OPTUNA_ARTIFACTS, artifact_filename)

print(os.path.exists(PATH_TO_OPTUNA_ARTIFACTS))
print(os.path.normpath(artifact_path))
print(os.path.exists(artifact_path))

lgb_dataset = load(artifact_path)

[I 2024-09-23 17:43:01,882] Using an existing study with name '04 - LGB Multiclass CV' instead of creating a new one.


True
..\work\optuna_artifacts\21d06fa3-67b0-4098-9421-a6e6c4a03e55
True


In [9]:
lgb_dataset

Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,RescuerID_Word_Count,MaturitySize_FurLength,PhotoAmt_FurLength,VideoAmt_FurLength,prediction_class_0,prediction_class_1,prediction_class_2,prediction_class_3,prediction_class_4,pred
14696,1,Dione & Elora,1,307,307,2,1,0,0,2,...,1,4,18.0,0,0.002637,0.156636,0.266874,0.471452,0.102400,"[0.04676390591402284, 0.7782228425274123, 2.32..."
14823,1,Har-nee,24,103,307,2,1,2,4,2,...,1,2,2.0,0,0.016761,0.166559,0.207251,0.194078,0.415351,"[0.07834690950229141, 1.1501783068876406, 1.02..."
2838,1,The Gorgeous 5 Beauties,2,307,0,2,2,7,0,2,...,1,2,5.0,0,0.019105,0.204035,0.353514,0.259572,0.163773,"[0.06196509584556729, 0.7925215856085531, 1.49..."
1848,2,Mochi,1,265,0,1,2,0,0,1,...,1,2,12.0,4,0.017194,0.299177,0.536504,0.100774,0.046351,"[0.1143038921277312, 1.1889348421776806, 2.182..."
669,2,Nala & Peach,9,266,266,2,2,4,6,2,...,1,2,8.0,0,0.008867,0.044874,0.324296,0.202780,0.419182,"[0.10835417602341627, 0.7268431636301831, 1.12..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,2,Anak Nanya,8,266,0,3,1,2,0,2,...,1,2,14.0,0,0.001659,0.028627,0.243163,0.213709,0.512842,"[0.03783643627609431, 0.535025556894434, 0.909..."
12222,1,Poor Baby,3,307,0,1,5,0,0,2,...,1,2,2.0,0,0.012552,0.302023,0.289652,0.160641,0.235132,"[0.05345929047926969, 1.3894126384434298, 1.36..."
10538,2,No Name,1,265,0,2,1,6,0,1,...,1,2,4.0,0,0.020152,0.481700,0.305483,0.129244,0.063421,"[0.08351152798647285, 2.268266953473084, 1.472..."
11062,1,Pipi,1,307,0,2,1,5,7,2,...,1,2,1.0,0,0.019263,0.177707,0.265056,0.190629,0.347345,"[0.14612655333264096, 0.6855164303520578, 1.23..."


In [10]:
MODEL_NAME = '04 ResNet'
MODEL_VERSION = '5.0.1'

study_resnet = optuna.create_study(direction='maximize',
                            storage="sqlite:///../work/db.sqlite3",  # Specify the storage URL here.
                            study_name=f'{MODEL_NAME}_{MODEL_VERSION}',
                            load_if_exists = True)

resnet_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_resnet,'test')))

[I 2024-09-23 17:43:02,047] Using an existing study with name '04 ResNet_5.0.0' instead of creating a new one.


In [11]:
resnet_dataset

Unnamed: 0,PetID,pred,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,...,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PhotoAmt,AdoptionSpeed
0,015da9e87,"[-0.6395981, 0.6651981, 0.5748904, -0.31238794...",2,Adik Gebuk (Betina),2,265,266,2,2,5,...,2,1,1,0,41326,d718a8deb57887c6ee18b757484273c8,0,Nama: Gebuk (Betina)- Adik beradik dengan Gebu...,5.0,0
1,022606901,"[-2.6306574, 0.067541316, 1.3824433, 1.1585656...",1,,3,141,307,1,1,0,...,2,1,1,0,41401,c4b8b921e00ba5dc19e793b81987f40f,1,Hi all =) My friend is currently looking for s...,5.0,0
2,02f89bdcb,"[-2.239159, -0.067799896, 0.28544965, 0.531364...",1,Rex,72,141,0,1,5,0,...,2,1,1,0,41326,e76b700e2c869088979aa5efeb962dd7,0,Friendly and playful. Good watchdog because of...,3.0,0
3,0cf7fae9d,"[-0.8543492, 0.70902026, 0.50105274, 0.0274021...",2,KITTENS - URGENT ADOPTION,1,266,0,3,1,2,...,2,1,4,0,41326,1eea485b01d14c668f33afa7c919646e,0,These 4 kittens need urgent adoption because t...,1.0,0
4,0e922caab,"[-0.77751434, 0.86782026, 0.4385824, 0.1009984...",1,Ha Ha (Toy Poodle),12,179,0,1,2,0,...,2,1,1,300,41326,225d19c861c7c5d20a9c3ba1b2d37753,0,Ha Ha belongs to my friend who migrated to ano...,5.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2926,ff2cf88a0,"[-2.2335615, 0.40898055, 0.6233817, 0.5892416,...",1,JACKSON,12,307,0,1,2,0,...,1,1,1,0,41326,2266840747a7440f9f3453e31b384df5,0,Meet Jackson. He's a eye catcher and always re...,2.0,4
2927,ff498c903,"[-1.8505809, 0.53731877, 0.66788495, 0.6367058...",1,Lady,63,189,307,2,1,2,...,1,1,1,0,41326,03806ca295ace12b7463f4ed036cbb0e,0,Lady was an abandoned dog last time after she ...,5.0,4
2928,ff50c6171,"[-1.4939973, 0.82045585, 0.5668725, -0.0161002...",2,Gari,4,247,0,2,1,2,...,2,1,1,50,41326,2ca58d9cdf6107e7169985db6562bc3e,0,House kitten. Malaysian friend gave to me. but...,5.0,4
2929,ff5e30380,"[-2.6725092, 0.10118386, 0.9888101, 0.42786977...",2,Fa Meow,4,254,0,2,1,3,...,1,1,1,100,41401,1faf0ae111772205cf2f28b3ecea3276,0,"Long haired like persian cat, friendly, fast r...",5.0,4


In [12]:
merged_datasets = lgb_dataset[['PetID', 'pred', 'AdoptionSpeed']].rename({'pred':'lgb_pred_score'},axis=1).merge(resnet_dataset[['PetID', 'pred']].rename({'pred':'resnet_pred_score'},axis=1),
                  on='PetID', how='outer')



merged_datasets['resnet_pred_score'] = [np.zeros(5) if type(i) is float else  i for i in merged_datasets['resnet_pred_score'] ]

In [13]:
merged_datasets['resnet_pred_score']

0       [-0.9214199, 0.854824, 0.47305572, 0.10514471,...
1       [-1.2277287, 0.78003865, 0.4918236, 0.2888547,...
2       [-1.4383341, 1.0272977, 1.1034435, 0.41163832,...
3       [-2.7544413, -0.31180084, 0.8000306, 0.5435256...
4       [-1.8303453, 0.60584456, 0.9848707, 0.6922527,...
                              ...                        
2994    [-1.5485533, 0.5682332, 0.1791851, 0.08940087,...
2995    [-2.529572, -0.50512755, 0.7474847, 0.87661374...
2996    [-1.4947095, 1.13886, 1.1923221, 0.25992072, -...
2997    [-1.4132695, -0.032174505, 0.72043836, 0.38531...
2998    [-1.7628758, 0.42222506, 0.8364029, 0.88034445...
Name: resnet_pred_score, Length: 2999, dtype: object

In [14]:
merged_datasets['blend_pred_score'] = [r['lgb_pred_score']+r['resnet_pred_score'] for i,r in merged_datasets.iterrows()]

In [15]:
merged_datasets['lgb_pred'] = [r.argmax() for r in merged_datasets['lgb_pred_score']]
merged_datasets['resnet_pred'] = [r.argmax() for r in merged_datasets['resnet_pred_score']]
merged_datasets['blended_pred'] = [r.argmax() for r in merged_datasets['blend_pred_score']]

In [16]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['lgb_pred'], 
                    title = 'LGB Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['lgb_pred'], 
                                                                    weights='quadratic')))

In [17]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['resnet_pred'], 
                    title = 'Resnet Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['resnet_pred'], 
                                                                    weights='quadratic')))



In [18]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['blended_pred'], 
                    title = 'Blended Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['blended_pred'], 
                                                                    weights='quadratic')))
