In [23]:
import numpy as np 
import pandas as pd 

from sklearn.metrics import cohen_kappa_score, accuracy_score,balanced_accuracy_score

from plotly import express as px

from utils import plot_confusion_matrix, get_artifact_filename

import os

from json import loads

from joblib import load, dump

import optuna
from optuna.artifacts import FileSystemArtifactStore, upload_artifact

In [24]:
# Paths
BASE_DIR = '../'
PATH_TO_TRAIN = os.path.join(BASE_DIR, "input/petfinder-adoption-prediction/train/train.csv")
PATH_TO_MODELS = os.path.join(BASE_DIR, "work/models")
PATH_TO_TEMP_FILES = os.path.join(BASE_DIR, "work/optuna_temp_artifacts")
PATH_TO_OPTUNA_ARTIFACTS = os.path.join(BASE_DIR, "work/optuna_artifacts")

In [25]:
study_lgb = optuna.create_study(direction='maximize',
                            storage="sqlite:///../work/db.sqlite3",  # Specify the storage URL here.
                            study_name="04 - LGB Multiclass CV",
                            load_if_exists = True)

# Get the artifact filename
artifact_filename = get_artifact_filename(study_lgb,'test')

# Combine the path and filename
artifact_path = os.path.join(PATH_TO_OPTUNA_ARTIFACTS, artifact_filename)

print(os.path.exists(PATH_TO_OPTUNA_ARTIFACTS))
print(os.path.normpath(artifact_path))
print(os.path.exists(artifact_path))

lgb_dataset = load(artifact_path)

[I 2024-09-26 20:18:39,496] Using an existing study with name '04 - LGB Multiclass CV' instead of creating a new one.


True
..\work\optuna_artifacts\21d06fa3-67b0-4098-9421-a6e6c4a03e55
True


In [26]:
lgb_dataset

Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,RescuerID_Word_Count,MaturitySize_FurLength,PhotoAmt_FurLength,VideoAmt_FurLength,prediction_class_0,prediction_class_1,prediction_class_2,prediction_class_3,prediction_class_4,pred
14696,1,Dione & Elora,1,307,307,2,1,0,0,2,...,1,4,18.0,0,0.002637,0.156636,0.266874,0.471452,0.102400,"[0.04676390591402284, 0.7782228425274123, 2.32..."
14823,1,Har-nee,24,103,307,2,1,2,4,2,...,1,2,2.0,0,0.016761,0.166559,0.207251,0.194078,0.415351,"[0.07834690950229141, 1.1501783068876406, 1.02..."
2838,1,The Gorgeous 5 Beauties,2,307,0,2,2,7,0,2,...,1,2,5.0,0,0.019105,0.204035,0.353514,0.259572,0.163773,"[0.06196509584556729, 0.7925215856085531, 1.49..."
1848,2,Mochi,1,265,0,1,2,0,0,1,...,1,2,12.0,4,0.017194,0.299177,0.536504,0.100774,0.046351,"[0.1143038921277312, 1.1889348421776806, 2.182..."
669,2,Nala & Peach,9,266,266,2,2,4,6,2,...,1,2,8.0,0,0.008867,0.044874,0.324296,0.202780,0.419182,"[0.10835417602341627, 0.7268431636301831, 1.12..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,2,Anak Nanya,8,266,0,3,1,2,0,2,...,1,2,14.0,0,0.001659,0.028627,0.243163,0.213709,0.512842,"[0.03783643627609431, 0.535025556894434, 0.909..."
12222,1,Poor Baby,3,307,0,1,5,0,0,2,...,1,2,2.0,0,0.012552,0.302023,0.289652,0.160641,0.235132,"[0.05345929047926969, 1.3894126384434298, 1.36..."
10538,2,No Name,1,265,0,2,1,6,0,1,...,1,2,4.0,0,0.020152,0.481700,0.305483,0.129244,0.063421,"[0.08351152798647285, 2.268266953473084, 1.472..."
11062,1,Pipi,1,307,0,2,1,5,7,2,...,1,2,1.0,0,0.019263,0.177707,0.265056,0.190629,0.347345,"[0.14612655333264096, 0.6855164303520578, 1.23..."


In [27]:
MODEL_NAME = '04 ResNet'
MODEL_VERSION = '5.0.1'

study_resnet = optuna.create_study(direction='maximize',
                            storage="sqlite:///../work/db.sqlite3",  # Specify the storage URL here.
                            study_name=f'{MODEL_NAME}_{MODEL_VERSION}',
                            load_if_exists = True)

resnet_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_resnet,'test')))

[I 2024-09-26 20:18:39,596] Using an existing study with name '04 ResNet_5.0.1' instead of creating a new one.


In [28]:
resnet_dataset

Unnamed: 0,PetID,pred,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,...,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PhotoAmt,AdoptionSpeed
0,015da9e87,"[-1.768325, 0.18485203, 0.74715734, 0.3766706,...",2,Adik Gebuk (Betina),2,265,266,2,2,5,...,2,1,1,0,41326,d718a8deb57887c6ee18b757484273c8,0,Nama: Gebuk (Betina)- Adik beradik dengan Gebu...,5.0,0
1,022606901,"[-3.3611686, -0.36317593, 0.7986378, 0.9519239...",1,,3,141,307,1,1,0,...,2,1,1,0,41401,c4b8b921e00ba5dc19e793b81987f40f,1,Hi all =) My friend is currently looking for s...,5.0,0
2,02f89bdcb,"[-3.027864, -0.031935632, 0.49349236, 0.814333...",1,Rex,72,141,0,1,5,0,...,2,1,1,0,41326,e76b700e2c869088979aa5efeb962dd7,0,Friendly and playful. Good watchdog because of...,3.0,0
3,0cf7fae9d,"[-0.7027494, 1.0775297, 0.5501126, -0.03754511...",2,KITTENS - URGENT ADOPTION,1,266,0,3,1,2,...,2,1,4,0,41326,1eea485b01d14c668f33afa7c919646e,0,These 4 kittens need urgent adoption because t...,1.0,0
4,0e922caab,"[-0.40759718, 1.3669151, 0.54109144, -0.100735...",1,Ha Ha (Toy Poodle),12,179,0,1,2,0,...,2,1,1,300,41326,225d19c861c7c5d20a9c3ba1b2d37753,0,Ha Ha belongs to my friend who migrated to ano...,5.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2926,ff2cf88a0,"[-2.749919, 0.43670714, 0.6319917, 0.4132462, ...",1,JACKSON,12,307,0,1,2,0,...,1,1,1,0,41326,2266840747a7440f9f3453e31b384df5,0,Meet Jackson. He's a eye catcher and always re...,2.0,4
2927,ff498c903,"[-2.8062663, -0.027764872, 0.2817769, 0.746720...",1,Lady,63,189,307,2,1,2,...,1,1,1,0,41326,03806ca295ace12b7463f4ed036cbb0e,0,Lady was an abandoned dog last time after she ...,5.0,4
2928,ff50c6171,"[-2.5004535, -0.5100352, 1.0119576, 0.9822008,...",2,Gari,4,247,0,2,1,2,...,2,1,1,50,41326,2ca58d9cdf6107e7169985db6562bc3e,0,House kitten. Malaysian friend gave to me. but...,5.0,4
2929,ff5e30380,"[-1.654369, 0.3078812, 0.6037669, 0.3708694, 0...",2,Fa Meow,4,254,0,2,1,3,...,1,1,1,100,41401,1faf0ae111772205cf2f28b3ecea3276,0,"Long haired like persian cat, friendly, fast r...",5.0,4


In [29]:
MODEL_NAME = '06 Bert'
MODEL_VERSION = '1.0'

study_bert = optuna.create_study(direction='maximize',
                            storage="sqlite:///../work/db.sqlite3",  # Specify the storage URL here.
                            study_name=f'{MODEL_NAME}_{MODEL_VERSION}',
                            load_if_exists = True)

bert_dataset = load(os.path.join(PATH_TO_OPTUNA_ARTIFACTS,get_artifact_filename(study_bert,'test')))

[I 2024-09-26 20:18:39,698] Using an existing study with name '06 Bert_1.0' instead of creating a new one.


In [30]:
bert_dataset

Unnamed: 0,PetID,pred,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,...,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PhotoAmt,AdoptionSpeed,labels
0,8e76c8e39,"[2.7906992e-05, 9.001282e-05, 8.072176e-05, 0....",2,Kali,3,264,0,2,1,2,...,1,1,50,41326,a9caef3f98e67bfac9093cca79e20b93,0,Kali is a super playful kitten who is on the g...,2.0,1,1
1,6436c1a59,"[1.4993968e-06, 5.01486e-05, 0.025264408, 0.95...",1,Godiva,12,307,0,2,2,7,...,1,1,0,41326,a042471e0f43f2cf707104a1a138a7df,0,Godiva was rescued in Serdang residential area...,7.0,2,2
2,988988d5b,"[4.6921297e-05, 0.0028261796, 0.02537138, 0.82...",2,Cikenet,3,266,0,1,2,7,...,1,1,0,41401,b8853c71b981104f1ef126e51387b616,0,"hello cikenets fans, i just wanna inform that ...",19.0,1,1
3,efbf1703a,"[0.012783979, 0.02724774, 0.036614597, 0.60610...",2,No Name,1,266,0,2,1,0,...,1,1,0,41326,2f846fb8f87a25678374e193559d83c9,0,"Just saved this kitten from the street, but i ...",2.0,2,2
4,543130f60,"[0.0019985973, 0.60652685, 0.19785205, 0.19212...",1,BoiBoi,24,307,0,1,5,7,...,1,5,0,41326,2147467fcd35e7a3bc23b9edcffc5702,0,Boiboi is rescued by my daughter 2 years ago f...,1.0,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2991,23874f644,"[8.4333085e-05, 0.0070316195, 0.16397795, 0.08...",1,Patch,8,307,0,2,2,7,...,1,1,0,41326,001e42763024f9d4abe31e79472b1827,0,Patch is for free adoption. If you want to ado...,2.0,3,3
2992,e7f7066b6,"[0.00018277224, 0.0021219593, 0.00016068382, 0...",1,Terry,24,179,307,1,2,3,...,1,1,0,41326,719987dce7aeb027fdfa91b480800199,0,been at my place for a while..am hoping to fin...,0.0,4,4
2993,36e7f8d83,"[1.43493735e-05, 0.0006955583, 0.022373617, 0....",2,Pets + Strays : BlueEyed BlackWhite,1,266,0,2,5,6,...,1,1,0,41401,90569c3f7cb0af35cba5dac82c0ac9d7,0,1 month old white + grey kitten for adoption n...,1.0,3,3
2994,4d163b731,"[0.00037809374, 0.11163858, 0.026053337, 0.162...",1,Snowy,6,195,0,2,1,7,...,1,1,0,41401,79309f4027f2fedb4349a298c69fe56f,0,ooooo,1.0,0,0


In [31]:
merged_datasets = lgb_dataset[['PetID', 'pred', 'AdoptionSpeed']].rename({'pred':'lgb_pred_score'},axis=1).merge(resnet_dataset[['PetID', 'pred']].rename({'pred':'resnet_pred_score'},axis=1),
                  on='PetID', how='outer')
merged_datasets = merged_datasets.merge(bert_dataset[['PetID', 'pred']].rename({'pred':'bert_pred_score'},axis=1),
                  on='PetID', how='outer')

merged_datasets['resnet_pred_score'] = [np.zeros(5) if type(i) is float else  i for i in merged_datasets['resnet_pred_score'] ]
merged_datasets['bert_pred_score'] = [np.zeros(5) if type(i) is float else  i for i in merged_datasets['bert_pred_score'] ]

In [32]:
merged_datasets['blend_pred_score'] = [r['lgb_pred_score']+r['resnet_pred_score']+r['bert_pred_score'] for i,r in merged_datasets.iterrows()]

In [33]:
merged_datasets['lgb_pred'] = [r.argmax() for r in merged_datasets['lgb_pred_score']]
merged_datasets['resnet_pred'] = [r.argmax() for r in merged_datasets['resnet_pred_score']]
merged_datasets['bert_pred'] = [r.argmax() for r in merged_datasets['bert_pred_score']]

merged_datasets['blended_pred'] = [r.argmax() for r in merged_datasets['blend_pred_score']]

In [34]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['lgb_pred'], 
                    title = 'LGB Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['lgb_pred'], 
                                                                    weights='quadratic')))

In [35]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['resnet_pred'], 
                    title = 'Resnet Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['resnet_pred'], 
                                                                    weights='quadratic')))



In [36]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['bert_pred'], 
                    title = 'Bert Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['bert_pred'], 
                                                                    weights='quadratic')))


In [37]:
plot_confusion_matrix(merged_datasets['AdoptionSpeed'],
                      merged_datasets['blended_pred'], 
                    title = 'Blended Model Kappa: ' + str(cohen_kappa_score(merged_datasets['AdoptionSpeed'],
                                                                    merged_datasets['blended_pred'], 
                                                                    weights='quadratic')))
