Generating tests for a varied set of documents, models and dimensions on the papers dataset

In [1]:
import pandas as pd
ai_set = pd.read_csv('../examples/datasets/ai_papers.csv')
dl_set = pd.read_csv('../examples//datasets/dl_papers.csv')
cv_set = pd.read_csv('../examples//datasets/cv_papers.csv')

In [None]:
# Pipeline para acelerar transformação para Dataset do HuggingFace

def hf_pipeline(data):
    from datasets import Dataset, ClassLabel

    unique_classes = data['label'].unique()

    dataframe = Dataset.from_pandas(data)
    class_label = ClassLabel(names=list(unique_classes))

    dataframe = dataframe.cast_column('label', class_label)

    return dataframe


def treat_datasets(n_papers=50):
    # Create new DataFrames with labels directly
    ai_papers = ai_set.iloc[:n_papers, :].assign(label="Artificial Intelligence")
    dl_papers = dl_set.iloc[:n_papers, :].assign(label="Digital Libraries")
    cv_papers = cv_set.iloc[:n_papers, :].assign(label="Computer Vision")

    # Concatenate
    papers = pd.concat([ai_papers, dl_papers, cv_papers], ignore_index=True)
    
    # Clean up columns
    papers = papers.drop(columns=['Unnamed: 0'], errors='ignore')
    
    # Select and rename columns
    abstracts = papers[['abstract', 'label']].rename(columns={'abstract': 'text'})
    
    return hf_pipeline(abstracts)



: 

In [None]:
from llm_mri import ActivationAreas, Evaluation
from llm_mri.dimensionality_reduction import PCA
import pandas as pd
import time
import warnings
import sys

# Suppress the SettingWithCopyWarning
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning)

n_papers = [20, 50, 100]
models = {'distilbert-base-uncased':'67M', 'bert-base-uncased':'110M', 'google-bert/bert-large-uncased':'330M'}
df_score = pd.DataFrame(columns=['model', 'documents', 'model_size', 'components', 'score', 'processing_time', 'hidden_states_size'])

for model in models:
    for paper in n_papers:
        print("----------------------------------------------------------------")
        print("Model - ", model, "with ", paper, " papers")
        abstracts = treat_datasets(n_papers=paper)
        pca = PCA(n_components=paper)
        
        llm_mri = ActivationAreas(model=model, device="cpu", dataset=abstracts, reduction_method=pca)
        
        # Time the processing
        start_time = time.time()
        llm_mri.process_activation_areas()
        processing_time = time.time() - start_time
        print(f"Total time: {processing_time:.2f} seconds")

        # Get hidden states dataset size in bytes
        hidden_states_size = sys.getsizeof(llm_mri.hidden_states_dataset)

        for components in [int(paper / 4), int(paper / 2), paper]:
            evaluation = Evaluation(llm_mri, n_components=components)
            results = evaluation.evaluate_model()
            # Append results to DataFrame
            new_evaluation = {
                'model': model,
                'documents': paper * 3,  # 3 datasets combined
                'model_size': models[model],
                'components': components,
                'score': results['f1_score_difference'],
                'processing_time': processing_time , # Add processing time,
                'hidden_states_size': hidden_states_size  # Add hidden states size
            }
            print(f"Evaluating model with {components} components: {results['f1_score_difference']:.3f} score!")
            
            df_score = pd.concat([df_score, pd.DataFrame([new_evaluation])], ignore_index=True)
        print("\n")
# Display the final DataFrame
print(df_score)

  from .autonotebook import tqdm as notebook_tqdm


----------------------------------------------------------------
Model -  distilbert-base-uncased with  20  papers


Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 13240.29 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 2110.15 examples/s]
Map: 100%|██████████| 60/60 [00:21<00:00,  2.73 examples/s]


Total time: 22.14 seconds
torch.Size([60])
torch.Size([60])
Evaluating model with 5 components: 0.099 score!
torch.Size([60])
torch.Size([60])


  df_score = pd.concat([df_score, pd.DataFrame([new_evaluation])], ignore_index=True)


Evaluating model with 10 components: 0.022 score!
torch.Size([60])
torch.Size([60])
Evaluating model with 20 components: 0.020 score!


----------------------------------------------------------------
Model -  distilbert-base-uncased with  50  papers


Casting the dataset: 100%|██████████| 150/150 [00:00<00:00, 44594.95 examples/s]
Map: 100%|██████████| 150/150 [00:00<00:00, 2568.50 examples/s]
Map: 100%|██████████| 150/150 [01:04<00:00,  2.32 examples/s]


Total time: 64.80 seconds
torch.Size([150])
torch.Size([150])
Evaluating model with 12 components: -0.020 score!
torch.Size([150])
torch.Size([150])
Evaluating model with 25 components: -0.003 score!
torch.Size([150])
torch.Size([150])
Evaluating model with 50 components: -0.020 score!


----------------------------------------------------------------
Model -  distilbert-base-uncased with  100  papers


Casting the dataset: 100%|██████████| 300/300 [00:00<00:00, 65937.81 examples/s]
Map: 100%|██████████| 300/300 [00:00<00:00, 1782.55 examples/s]
Map:   0%|          | 0/300 [00:00<?, ? examples/s]