In [None]:
import pandas as pd
import json

In [None]:
df_bert_clip = pd.read_csv("results_bert_clip.csv")
df_bert_clip.rename(columns = {"answer": "answer_bert_clip", "conf": "conf_bert_clip"}, inplace = True)
df_bert_clip.head()

In [None]:
df_bert_clip.conf_bert_clip.hist()

In [None]:
df_clip = pd.read_csv("results_clip_deuser.csv")
df_clip.rename(columns = {"answer": "answer_clip", "conf": "conf_clip"}, inplace = True)
df_clip.head()

In [None]:
df_clip.conf_clip.hist()

In [None]:
df_blip2 = pd.read_csv("results_blip2.csv")
df_blip2.rename(columns = {"answer": "answer_blip2", "conf": "conf_blip2"}, inplace = True)
df_blip2.head()

In [None]:
df_blip2.conf_blip2.hist()

In [None]:
df_models = df_bert_clip.merge(df_clip, how = "inner", on = "image")
df_models = df_models.merge(df_blip2, how = "inner", on = "image")
df_models

---

In [None]:
# Ensemble 1: Bert_Clip com clip

def ensemble_bert_clip_with_clip(row):
    
    if row.answer_bert_clip == row.answer_clip:
        return row.answer_bert_clip
    
    else:
        
        if row.conf_bert_clip > row.conf_clip:
            return row.answer_bert_clip
        
        else:
            return row.answer_clip

In [None]:
df_models["emsemble1"] = df_models.apply(ensemble_bert_clip_with_clip, axis = 1)
df_models.head()

In [None]:
model_answers = []
for i in range(len(df_models)):
    image_url = df_models['image'][i]
    answer = df_models['emsemble1'][i]
    answer_result = {'image': image_url, 'answer': answer}
    model_answers.append(answer_result)

with open('answers_results_e1.json', 'w') as file:
    json.dump(model_answers, file)

---

In [None]:
# Ensemble 2: Bert_Clip com blip2

def ensemble_bert_clip_with_blip2(row):
    
    if row.answer_bert_clip == row.answer_blip2:
        return row.answer_bert_clip
    
    else:
        
        if row.conf_bert_clip > row.conf_blip2:
            return row.answer_bert_clip
        
        else:
            return row.answer_blip2

In [None]:
df_models["emsemble2"] = df_models.apply(ensemble_bert_clip_with_blip2, axis = 1)
df_models.head()

In [None]:
model_answers = []
for i in range(len(df_models)):
    image_url = df_models['image'][i]
    answer = df_models['emsemble2'][i]
    answer_result = {'image': image_url, 'answer': answer}
    model_answers.append(answer_result)

with open('answers_results_e2.json', 'w') as file:
    json.dump(model_answers, file)

---

In [None]:
# Ensemble 3: Clip com blip2

def ensemble_clip_with_blip2(row):
    
    if row.answer_clip == row.answer_blip2:
        return row.answer_clip
    
    else:
        
        if row.conf_clip > row.conf_blip2:
            return row.answer_clip
        
        else:
            return row.answer_blip2

In [None]:
df_models["emsemble3"] = df_models.apply(ensemble_clip_with_blip2, axis = 1)
df_models.head()

In [None]:
model_answers = []
for i in range(len(df_models)):
    image_url = df_models['image'][i]
    answer = df_models['emsemble3'][i]
    answer_result = {'image': image_url, 'answer': answer}
    model_answers.append(answer_result)

with open('answers_results_e3.json', 'w') as file:
    json.dump(model_answers, file)

---

In [None]:
# Ensemble 4: all

def ensemble_all(row):
    
    cols = [i for i in row.index.tolist() if "answer" in i]
    df_count_answer = pd.DataFrame({"answer": row[cols].values}).answer.value_counts().reset_index()
    
    # Quando o shape é 1 significa que as 3 predições foram iguais
    if df_count_answer.shape[0] == 1:
        return(row[cols[0]])
    
    # Quando o shape é 2 significa que 2 modelos concordaram e 1 não, no caso, o primeiro é o que teve 
    # a resposta mais frequente
    if df_count_answer.shape[0] == 2:
        return(df_count_answer["index"][0])
    
    # Quando o shape é 3 significa que todos os modelos descordaram na predição
    if df_count_answer.shape[0] == 3:
        
        if (row.conf_clip > row.conf_blip2)&(row.conf_clip > row.conf_bert_clip):
            return row.answer_clip
        
        elif (row.conf_blip2 > row.conf_clip)&(row.conf_blip2 > row.conf_bert_clip):
            return row.answer_blip2
        
        else:
            return row.answer_bert_clip

In [None]:
df_models["emsemble4"] = df_models.apply(ensemble_all, axis = 1)
df_models.head()

In [None]:
df_models[(df_models.answer_bert_clip != df_models.answer_clip)&(df_models.answer_clip != df_models.answer_blip2)]

In [None]:
model_answers = []
for i in range(len(df_models)):
    image_url = df_models['image'][i]
    answer = df_models['emsemble4'][i]
    answer_result = {'image': image_url, 'answer': answer}
    model_answers.append(answer_result)

with open('answers_results_e4.json', 'w') as file:
    json.dump(model_answers, file)