# Evaluacion de Test No Etiquetado para Task 5

## Importar Dependencias y Librerias

In [None]:
# Instalacion de dependencias
!pip install pytorch-lightning
!pip install --upgrade accelerate
!pip install framework-reproducibility
!pip install transformers datasets
!pip install --upgrade numpy
!pip install --upgrade pandas
!pip install --upgrade scikit-learn

In [None]:
import random
import torch
import numpy as np
import os
from pytorch_lightning import seed_everything
import matplotlib.pyplot as plt
import seaborn as sns
import re

seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)# Store the average loss after eachepoch so we can plot them.
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
os.environ["TF_DETERMINISTIC_OPS"] = "1" # See:https://github.com/NVIDIA/tensorflow-determinism#confirmed-current-gpu-specific-sources-of-non-determinism-with-solutions
seed_everything(42, workers=True)

from datasets import Dataset, DatasetDict, load_metric
import pandas as pd
import sklearn as sk
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForSequenceClassification, \
TrainingArguments, Trainer, pipeline, EarlyStoppingCallback

  from .autonotebook import tqdm as notebook_tqdm
Seed set to 42


In [None]:
# Comprobacion GPU
# Check that pyTorch is identifying the GPU
if torch.cuda.device_count() > 0:
    # If a GPU is available, print its name
    print(f'GPU detected. Currently using: "{torch.cuda.get_device_name(0)}"')
    # Set the device to GPU for accelerated computations
    device = torch.device("cuda")
else:
    # If no GPU is available, inform the user to change the runtime type
    print('Currently using CPU. To utilize GPU acceleration, change the runtime type in the \'runtime\' tab.')

In [None]:
# Conexion drive
from google.colab import drive
drive.mount('/content/drive')

## Lectura de Modelo de Task 4

In [None]:
# Leer json task4
train_data_path = '/home/alvarocarrillo/TFG/Trabajo/Dataset/exist2024_I2C-Huelva/task5_hard_I2C-Huelva_3.json'

# Los transformamos en Dataframes
task4 = pd.read_json(train_data_path)

task4

Unnamed: 0,id,value,test_case
0,310001,DIRECT,EXIST2024
1,310002,NO,EXIST2024
2,310003,DIRECT,EXIST2024
3,310004,NO,EXIST2024
4,310005,DIRECT,EXIST2024
...,...,...,...
1048,410509,JUDGEMENTAL,EXIST2024
1049,410510,DIRECT,EXIST2024
1050,410511,DIRECT,EXIST2024
1051,410512,JUDGEMENTAL,EXIST2024


In [None]:
task4.value_counts('value')

value
DIRECT         440
NO             413
JUDGEMENTAL    200
Name: count, dtype: int64

In [None]:
# Coger los ids que son YES
yes_ids = task4[task4['value'] == 'YES']['id'].tolist()
print(len(yes_ids))

640


## Lectura de Datos

In [None]:
# LEER ARCHIVO
# Cargamos los datos de entrenamiento test
test_data_path = '/home/alvarocarrillo/TFG/Trabajo/Dataset/EXIST2024_test_clean.json'

# Los transformamos en Dataframes
test_df_full = pd.read_json(test_data_path, orient='index')

test_df_full

Unnamed: 0,id_EXIST,lang,text,meme,path_memes,number_annotators,annotators,gender_annotators,age_annotators,ethnicities_annotators,study_levels_annotators,countries_annotators,split
310001,310001,es,Soy como la madre de mi hermano,310001.jpeg,memes/310001.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310002,310002,es,DESAFI LLEVAR EN IRÁN LAS AUTORIDADES ESTÁN RE...,310002.jpeg,memes/310002.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310003,310003,es,Vincent Vega @VincentVega677-7h D *** YA VIENE...,310003.jpeg,memes/310003.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310004,310004,es,A ti Mujer soñadora Feliz Día,310004.jpeg,memes/310004.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310005,310005,es,Lya Gonzalez @LyaGonzalez1 Aquí hay revolución...,310005.jpeg,memes/310005.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
...,...,...,...,...,...,...,...,...,...,...,...,...,...
410509,410509,en,"WOMEN SHOULDN'T WORK OUTSIDE THE HOME YEAH, RI...",410509.jpeg,memes/410509.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410510,410510,en,"Sa.thousand.apologies ACTUALLY, WOMEN SHOULDN'...",410510.jpeg,memes/410510.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410511,410511,en,Bitches only think you good for sex.. like bit...,410511.jpeg,memes/410511.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410512,410512,en,BITCHES BE LOOKING LIKE THIS @ThingsBitchesSay...,410512.jpeg,memes/410512.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN


In [None]:
# Filtrar las filas de 'test' donde 'id_EXIST' coincide con los IDs en 'yes_ids'
test_df = test_df_full[test_df_full['id_EXIST'].isin(yes_ids)]

# Mostrar el DataFrame filtrado
test_df

Unnamed: 0,id_EXIST,lang,text,meme,path_memes,number_annotators,annotators,gender_annotators,age_annotators,ethnicities_annotators,study_levels_annotators,countries_annotators,split
310001,310001,es,Soy como la madre de mi hermano,310001.jpeg,memes/310001.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310003,310003,es,Vincent Vega @VincentVega677-7h D *** YA VIENE...,310003.jpeg,memes/310003.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310005,310005,es,Lya Gonzalez @LyaGonzalez1 Aquí hay revolución...,310005.jpeg,memes/310005.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310007,310007,es,"Nooo, el día de la mujer es una conmemoración,...",310007.jpeg,memes/310007.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
310011,310011,es,"Nuria Madrid13nadamas @NuriRealMadrid Ella 46,...",310011.jpeg,memes/310011.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
...,...,...,...,...,...,...,...,...,...,...,...,...,...
410509,410509,en,"WOMEN SHOULDN'T WORK OUTSIDE THE HOME YEAH, RI...",410509.jpeg,memes/410509.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410510,410510,en,"Sa.thousand.apologies ACTUALLY, WOMEN SHOULDN'...",410510.jpeg,memes/410510.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410511,410511,en,Bitches only think you good for sex.. like bit...,410511.jpeg,memes/410511.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
410512,410512,en,BITCHES BE LOOKING LIKE THIS @ThingsBitchesSay...,410512.jpeg,memes/410512.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN


In [None]:
campo_texto = 'text'

test_df[campo_texto] = test_df[campo_texto].str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df[campo_texto] = test_df[campo_texto].str.lower()


In [None]:
# Lo pasamos a objeto dataset
test_dataset = Dataset.from_pandas(test_df)
test_dataset

Dataset({
    features: ['id_EXIST', 'lang', 'text', 'meme', 'path_memes', 'number_annotators', 'annotators', 'gender_annotators', 'age_annotators', 'ethnicities_annotators', 'study_levels_annotators', 'countries_annotators', 'split', '__index_level_0__'],
    num_rows: 640
})

## Cargar Modelos

In [None]:
# Se carga los modelos que se han entrenado
modelos = ["persp_M", "persp_F", "persp_23-45", "persp_18-22", "persp_46+", "persp_Bachelor’s degree", "persp_High school degree or equivalent", "persp_White or Caucasian"]
#model_path = '/home/alvarocarrillo/TFG/Trabajo/Dataset/Modelos/Bert_Extended2/modelo_'

model_path = '/home/alvarocarrillo/TFG/Trabajo/Dataset/Modelos/Task5/Bert/modelo_'

loaded_models = {}
for modelo in modelos:
    model = AutoModelForSequenceClassification.from_pretrained(model_path + modelo)
    loaded_models[modelo] = model

In [None]:
loaded_models

{'persp_M': BertForSequenceClassification(
   (bert): BertModel(
     (embeddings): BertEmbeddings(
       (word_embeddings): Embedding(105879, 768, padding_idx=0)
       (position_embeddings): Embedding(512, 768)
       (token_type_embeddings): Embedding(2, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
       (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): BertEncoder(
       (layer): ModuleList(
         (0-11): 12 x BertLayer(
           (attention): BertAttention(
             (self): BertSelfAttention(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
               (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): BertSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)
               (LayerN

## Hacer Predicciones

In [None]:
# Eleccion del modelo
#model_checkpoint = "xlm-roberta-base"
#tokenizer1 = AutoTokenizer.from_pretrained(model_checkpoint)
model_checkpoint = 'bert-base-multilingual-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Crear un diccionario de pipelines para cada modelo
pipes = {}
for modelo_nombre, modelo in loaded_models.items():
    pipe = pipeline("text-classification", model=modelo, tokenizer=tokenizer, device=0)
    pipes[modelo_nombre] = pipe

# Definir la función para hacer predicciones para todos los modelos a la vez
def get_predictions_for_all_models(records):
    predictions_all_models = {}
    for modelo_nombre, pipe in pipes.items():
        result = pipe(records[campo_texto], truncation=True)
        pred_label = result[0]['label']
        score_label = result[0]['score']

        if pred_label == 'LABEL_0':
            pred_label = 0
        else:
            pred_label = 1

        predictions_all_models[modelo_nombre] = pred_label

    return predictions_all_models

# Aplicar la función a los conjuntos de datos de prueba y validación
test_dataset_predicted_all_models = test_dataset.map(get_predictions_for_all_models)

# Imprimir el primer ejemplo de los conjuntos de datos con predicciones para todos los modelos
print("Primer ejemplo del conjunto de datos de prueba con predicciones para todos los modelos:")
print(test_dataset_predicted_all_models[0])

Map:   2%|▍                             | 10/640 [00:00<00:15, 40.42 examples/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Map: 100%|█████████████████████████████| 640/640 [00:14<00:00, 44.19 examples/s]

Primer ejemplo del conjunto de datos de prueba con predicciones para todos los modelos:
{'id_EXIST': 310001, 'lang': 'es', 'text': 'soy como la madre de mi hermano ', 'meme': '310001.jpeg', 'path_memes': 'memes/310001.jpeg', 'number_annotators': 6, 'annotators': ['Annotator_888', 'Annotator_889', 'Annotator_890', 'Annotator_891', 'Annotator_892', 'Annotator_893'], 'gender_annotators': ['M', 'M', 'M', 'F', 'F', 'F'], 'age_annotators': ['46+', '23-45', '18-22', '46+', '18-22', '23-45'], 'ethnicities_annotators': ['White or Caucasian', 'White or Caucasian', 'White or Caucasian', 'Hispano or Latino', 'White or Caucasian', 'White or Caucasian'], 'study_levels_annotators': ['Master’s degree', 'Bachelor’s degree', 'High school degree or equivalent', 'Bachelor’s degree', 'Bachelor’s degree', 'Bachelor’s degree'], 'countries_annotators': ['Italy', 'Spain', 'Portugal', 'Mexico', 'United Kingdom', 'Spain'], 'split': 'TEST-MEME_ES', '__index_level_0__': 310001, 'persp_M': 1, 'persp_F': 1, 'persp_2




In [None]:
test_dataset_predicted_all_models

Dataset({
    features: ['id_EXIST', 'lang', 'text', 'meme', 'path_memes', 'number_annotators', 'annotators', 'gender_annotators', 'age_annotators', 'ethnicities_annotators', 'study_levels_annotators', 'countries_annotators', 'split', '__index_level_0__', 'persp_M', 'persp_F', 'persp_23-45', 'persp_18-22', 'persp_46+', 'persp_Bachelor’s degree', 'persp_High school degree or equivalent', 'persp_White or Caucasian'],
    num_rows: 640
})

In [None]:
test_dataset_predicted_all_models.set_format('pandas')
df_test = test_dataset_predicted_all_models[:]
df_test

Unnamed: 0,id_EXIST,lang,text,meme,path_memes,number_annotators,annotators,gender_annotators,age_annotators,ethnicities_annotators,...,split,__index_level_0__,persp_M,persp_F,persp_23-45,persp_18-22,persp_46+,persp_Bachelor’s degree,persp_High school degree or equivalent,persp_White or Caucasian
0,310001,es,soy como la madre de mi hermano,310001.jpeg,memes/310001.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...",...,TEST-MEME_ES,310001,1,1,1,1,0,1,1,1
1,310003,es,vincent vega @vincentvega677-7h d *** ya viene...,310003.jpeg,memes/310003.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...",...,TEST-MEME_ES,310003,1,1,1,1,1,0,1,1
2,310005,es,lya gonzalez @lyagonzalez1 aquí hay revolución...,310005.jpeg,memes/310005.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...",...,TEST-MEME_ES,310005,1,1,1,1,1,1,0,1
3,310007,es,"nooo, el día de la mujer es una conmemoración,...",310007.jpeg,memes/310007.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...",...,TEST-MEME_ES,310007,1,1,1,1,1,1,1,1
4,310011,es,"nuria madrid13nadamas @nurirealmadrid ella 46,...",310011.jpeg,memes/310011.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...",...,TEST-MEME_ES,310011,1,1,1,1,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,410509,en,"women shouldn't work outside the home yeah, ri...",410509.jpeg,memes/410509.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...",...,TEST-MEME_EN,410509,0,0,1,0,0,0,0,0
636,410510,en,"sa.thousand.apologies actually, women shouldn'...",410510.jpeg,memes/410510.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...",...,TEST-MEME_EN,410510,0,1,1,1,1,1,1,0
637,410511,en,bitches only think you good for sex.. like bit...,410511.jpeg,memes/410511.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...",...,TEST-MEME_EN,410511,1,1,1,0,1,1,1,0
638,410512,en,bitches be looking like this @thingsbitchessay...,410512.jpeg,memes/410512.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...",...,TEST-MEME_EN,410512,0,1,1,1,0,0,0,0


In [None]:
pesos = {
    "persp_M": {"Peso": 1},
    "persp_F": {"Peso": 0.5},
    "persp_23-45": {"Peso": 1.75},
    "persp_18-22": {"Peso": 0.5},
    "persp_46+": {"Peso": 1},
    "persp_Bachelor’s degree": {"Peso": 1},
    "persp_High school degree or equivalent": {"Peso": 1.75},
    "persp_White or Caucasian": {"Peso": 0.5},
}

In [None]:
# Calcula la predicción conjunta
def calcular_prediccion_conjunta(df_test, pesos):
    suma_ponderada = 0
    for modelo in modelos:
        peso = pesos[modelo]["Peso"]
        pred = df_test[modelo]
        suma_ponderada += pred * peso

    # Divide la suma ponderada por el número de modelos
    pred_conjunta = suma_ponderada / len(modelos)

    # Aplica el umbral de 0.5 a cada elemento de pred_conjunta
    for i in range(len(pred_conjunta)):
        if pred_conjunta[i] >= 0.5:
            pred_conjunta[i] = 1
        else:
            pred_conjunta[i] = 0

    return pred_conjunta

# Calculo de predicciones conjuntas
predicciones_finales_hard = calcular_prediccion_conjunta(df_test, pesos)

# Imprimir las predicciones finales
print("Predicciones finales:")
predicciones_finales_hard

Predicciones finales:


0      1.0
1      1.0
2      1.0
3      1.0
4      1.0
      ... 
635    0.0
636    1.0
637    1.0
638    0.0
639    1.0
Name: persp_M, Length: 640, dtype: float64

In [None]:
# Calcula la predicción conjunta
def calcular_prediccion_conjunta(df_test, pesos):
    suma_ponderada = 0
    for modelo in modelos:
        peso = pesos[modelo]["Peso"]
        pred = df_test[modelo]
        suma_ponderada += pred * peso

    # Divide la suma ponderada por el número de modelos
    pred_conjunta = suma_ponderada / len(modelos)

    return pred_conjunta

# Calculo de predicciones conjuntas
predicciones_finales_soft = calcular_prediccion_conjunta(df_test, pesos)

# Imprimir las predicciones finales
print("Predicciones finales:")
predicciones_finales_soft

Predicciones finales:


0      0.87500
1      0.87500
2      0.78125
3      1.00000
4      0.87500
        ...   
635    0.21875
636    0.81250
637    0.87500
638    0.34375
639    0.53125
Name: persp_M, Length: 640, dtype: float64

## Creacion y Guardado de Archivo

In [None]:
test_df_full.reset_index(drop=True, inplace=True)
test_df_full

Unnamed: 0,id_EXIST,lang,text,meme,path_memes,number_annotators,annotators,gender_annotators,age_annotators,ethnicities_annotators,study_levels_annotators,countries_annotators,split
0,310001,es,Soy como la madre de mi hermano,310001.jpeg,memes/310001.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
1,310002,es,DESAFI LLEVAR EN IRÁN LAS AUTORIDADES ESTÁN RE...,310002.jpeg,memes/310002.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
2,310003,es,Vincent Vega @VincentVega677-7h D *** YA VIENE...,310003.jpeg,memes/310003.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
3,310004,es,A ti Mujer soñadora Feliz Día,310004.jpeg,memes/310004.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
4,310005,es,Lya Gonzalez @LyaGonzalez1 Aquí hay revolución...,310005.jpeg,memes/310005.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048,410509,en,"WOMEN SHOULDN'T WORK OUTSIDE THE HOME YEAH, RI...",410509.jpeg,memes/410509.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
1049,410510,en,"Sa.thousand.apologies ACTUALLY, WOMEN SHOULDN'...",410510.jpeg,memes/410510.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
1050,410511,en,Bitches only think you good for sex.. like bit...,410511.jpeg,memes/410511.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
1051,410512,en,BITCHES BE LOOKING LIKE THIS @ThingsBitchesSay...,410512.jpeg,memes/410512.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN


In [None]:
test_df.reset_index(drop=True, inplace=True)
test_df

Unnamed: 0,id_EXIST,lang,text,meme,path_memes,number_annotators,annotators,gender_annotators,age_annotators,ethnicities_annotators,study_levels_annotators,countries_annotators,split
0,310001,es,soy como la madre de mi hermano,310001.jpeg,memes/310001.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
1,310003,es,vincent vega @vincentvega677-7h d *** ya viene...,310003.jpeg,memes/310003.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
2,310005,es,lya gonzalez @lyagonzalez1 aquí hay revolución...,310005.jpeg,memes/310005.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
3,310007,es,"nooo, el día de la mujer es una conmemoración,...",310007.jpeg,memes/310007.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
4,310011,es,"nuria madrid13nadamas @nurirealmadrid ella 46,...",310011.jpeg,memes/310011.jpeg,6,"[Annotator_888, Annotator_889, Annotator_890, ...","[M, M, M, F, F, F]","[46+, 23-45, 18-22, 46+, 18-22, 23-45]","[White or Caucasian, White or Caucasian, White...","[Master’s degree, Bachelor’s degree, High scho...","[Italy, Spain, Portugal, Mexico, United Kingdo...",TEST-MEME_ES
...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,410509,en,"women shouldn't work outside the home yeah, ri...",410509.jpeg,memes/410509.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
636,410510,en,"sa.thousand.apologies actually, women shouldn'...",410510.jpeg,memes/410510.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
637,410511,en,bitches only think you good for sex.. like bit...,410511.jpeg,memes/410511.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN
638,410512,en,bitches be looking like this @thingsbitchessay...,410512.jpeg,memes/410512.jpeg,6,"[Annotator_1102, Annotator_1103, Annotator_196...","[M, M, M, F, F, F]","[18-22, 23-45, 46+, 18-22, 23-45, 46+]","[Black or African American, White or Caucasian...","[High school degree or equivalent, High school...","[South Africa, Belgium, Portugal, Poland, Sout...",TEST-MEME_EN


In [None]:
import json

def format_predictions(hard, soft):
    results_hard = []
    results_soft = []

    for i in range(len(test_df_full)):
        tweet_id = f"{test_df_full['id_EXIST'][i]}"
        test_case = "EXIST2024"

        # Si el ID está en test_df, usamos las predicciones correspondientes
        if test_df_full['id_EXIST'][i] in test_df['id_EXIST'].values:
            index = test_df[test_df['id_EXIST'] == test_df_full['id_EXIST'][i]].index[0]

            if hard[index] == 1:
                value_hard = "DIRECT"
            else:
                value_hard = "JUDGEMENTAL"

            value_soft = {"NO": 0.0, "DIRECT": soft[index], "JUDGEMENTAL": 1-soft[index]}

        else:
            value_hard = "NO"  # O "NO", según sea necesario
            value_soft = {"NO": 1.0, "DIRECT": 0.0, "JUDGEMENTAL": 0.0}

        # Construir el objeto JSON
        json_obj_hard = {
            "id": tweet_id,
            "value": value_hard,
            "test_case": test_case
        }

        json_obj_soft = {
            "id": tweet_id,
            "value": value_soft,
            "test_case": test_case
        }

        # Agregar el objeto JSON a la lista de resultados
        results_hard.append(json_obj_hard)
        results_soft.append(json_obj_soft)


    return results_hard, results_soft

resultado_hard, resultado_soft = format_predictions(predicciones_finales_hard, predicciones_finales_soft)

In [None]:
resultado_hard

[{'id': '310001', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310002', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310003', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310004', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310005', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310006', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310007', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310008', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310009', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310010', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310011', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310012', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310013', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id': '310014', 'value': 'DIRECT', 'test_case': 'EXIST2024'},
 {'id': '310015', 'value': 'JUDGEMENTAL', 'test_case': 'EXIST2024'},
 {'id': '310016', 'value': 'NO', 'test_case': 'EXIST2024'},
 {'id':

In [None]:
resultado_soft

[{'id': '310001',
  'value': {'NO': 0.0, 'DIRECT': 0.875, 'JUDGEMENTAL': 0.125},
  'test_case': 'EXIST2024'},
 {'id': '310002',
  'value': {'NO': 1.0, 'DIRECT': 0.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310003',
  'value': {'NO': 0.0, 'DIRECT': 0.875, 'JUDGEMENTAL': 0.125},
  'test_case': 'EXIST2024'},
 {'id': '310004',
  'value': {'NO': 1.0, 'DIRECT': 0.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310005',
  'value': {'NO': 0.0, 'DIRECT': 0.78125, 'JUDGEMENTAL': 0.21875},
  'test_case': 'EXIST2024'},
 {'id': '310006',
  'value': {'NO': 1.0, 'DIRECT': 0.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310007',
  'value': {'NO': 0.0, 'DIRECT': 1.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310008',
  'value': {'NO': 1.0, 'DIRECT': 0.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310009',
  'value': {'NO': 1.0, 'DIRECT': 0.0, 'JUDGEMENTAL': 0.0},
  'test_case': 'EXIST2024'},
 {'id': '310010',
  'value': {

In [None]:
# Ruta donde guardar el archivo JSON
ruta_guardado_hard = '/home/alvarocarrillo/TFG/Trabajo/Dataset/exist2024_I2C-Huelva/task5_hard_I2C-Huelva_3.json'
ruta_guardado_soft = '/home/alvarocarrillo/TFG/Trabajo/Dataset/exist2024_I2C-Huelva/task5_soft_I2C-Huelva_3.json'

# Guardar el diccionario en un archivo JSON
import json

with open(ruta_guardado_hard, 'w') as f:
    json.dump(resultado_hard, f, indent=2)

with open(ruta_guardado_soft, 'w') as f:
    json.dump(resultado_soft, f, indent=2)
