In [None]:
import os
# Memory saving
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


In [None]:
# mount drive to access data
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/Lupus-Subreddit-LLM/
%ls

In [None]:
!pip install -r requirements.txt
# !pip install --upgrade --force-reinstall sentencepiece

In [None]:
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
print(torch.__version__)  # Should be 2.5.1+cu121
print(torch.version.cuda) # Expected: 12.1
print(torch.cuda.is_available())  # Expected: True
from torch.utils.data import Dataset
from tqdm import tqdm
import evaluate
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM

from peft import get_peft_model, LoraConfig, TaskType
import re

import pickle
import json
import matplotlib.pyplot as plt

from urllib.request import urlopen
import io
import pandas as pd
import numpy as np
import seaborn as sns
from huggingface_hub import login

login("INSERT HUGGING FACE TOKEN")
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
import accelerate
import datasets
from datasets import Dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration


2.6.0+cu124
12.4
True


# Load Subreddit Annotated Dataset

* Structured summaries based on concepts framed in the Biopsychosocial Model

In [None]:
# Read in subreddit lupus dataset which has been human annotated with json headers across columns
annotated_post_df = pd.read_csv("lupus_subreddit_revised_post_level_pain_dimensions_revised_post_level_25.csv")
annotated_post_df

In [None]:
# Define the starting column
start_col = "Pain Descriptions"

# Select relevant columns from "Pain Descriptions" onward
selected_columns = annotated_post_df.loc[:, start_col:]

# Convert each row into a dictionary and store in a list
annotated_post_df['labeled_summaries'] = selected_columns.apply(lambda row: row.to_dict(), axis=1).tolist()

# Example usage: Accessing the first Reddit post's labeled summary
pd.set_option('display.max_colwidth', None)

print(annotated_post_df.head(20)) # Prints the dictionary for the first post


annotated_post_df = annotated_post_df[['selftext', 'labeled_summaries']]
annotated_post_df
# annotated_post_df.to_csv("llm_finetuning_25_sample.csv")

In [None]:
second_set_df = pd.read_csv("filtered_ratings_revised_dw.csv")
second_set_df

second_set_df = second_set_df[['instruction', 'response_edited']].rename(columns={
    'instruction': 'selftext',
    'response_edited': 'labeled_summaries'
})
second_set_df
# Ensure all values in 'labeled_summaries' are strings before attempting JSON conversion
second_set_df['labeled_summaries'] = second_set_df['labeled_summaries'].astype(str)

import ast

# Function to ensure all JSON-like entries are converted properly before parsing
def parse_fixed_labeled_summaries(entry):
    if isinstance(entry, dict):
        # Entry is already properly formatted, return as-is
        return entry

    if isinstance(entry, str):
        try:
            # Convert string representations of dictionaries into actual dictionaries
            parsed_dict = ast.literal_eval(entry)

            # Ensure values that are string representations of lists are converted to actual lists
            for key, value in parsed_dict.items():
                if isinstance(value, str) and value.startswith("[") and value.endswith("]"):
                    try:
                        parsed_dict[key] = ast.literal_eval(value)
                    except (SyntaxError, ValueError):
                        parsed_dict[key] = []  # Default to empty list if parsing fails

            return parsed_dict

        except (SyntaxError, ValueError):
            # If parsing fails, attempt a JSON conversion instead
            try:
                return json.loads(entry.replace("'", "\""))  # Convert single quotes to double quotes for JSON parsing
            except json.JSONDecodeError:
                return None  # Return None if both attempts fail

    return entry  # Return as-is if not a string or dictionary

# Apply the updated function to second_set_df
second_set_df['labeled_summaries'] = second_set_df['labeled_summaries'].apply(parse_fixed_labeled_summaries)


# annotated_df_cleaning
def clean_labeled_summaries(entry):
    if isinstance(entry, dict):  # Already properly formatted
        return entry

    if isinstance(entry, str):
        try:
            # Convert to dictionary safely
            parsed_dict = ast.literal_eval(entry)

            # Convert string representations of lists into actual lists
            for key, value in parsed_dict.items():
                if isinstance(value, str) and value.startswith("[") and value.endswith("]"):
                    try:
                        parsed_dict[key] = ast.literal_eval(value)  # Convert list-like strings to lists
                    except (SyntaxError, ValueError):
                        parsed_dict[key] = []  # Default to empty list if parsing fails

                elif isinstance(value, float) and np.isnan(value):  # Convert NaN to empty lists
                    parsed_dict[key] = []

            return parsed_dict

        except (SyntaxError, ValueError):
            return None  # Return None if parsing completely fails

    return entry  # Return as-is if not a string or dictionary

# Apply the function to clean the 'labeled_summaries' column
annotated_post_df['labeled_summaries'] = annotated_post_df['labeled_summaries'].apply(clean_labeled_summaries)

# Function to further clean nested string issues inside lists for annotated_posts_df
def deep_clean_labeled_summaries(entry):
    if isinstance(entry, dict):
        for key, value in entry.items():
            if isinstance(value, str) and value.startswith("[") and value.endswith("]"):
                try:
                    entry[key] = ast.literal_eval(value)  # Convert string lists into actual lists
                except (SyntaxError, ValueError):
                    entry[key] = []  # Default to empty list if parsing fails
        return entry

    return entry  # Return as-is if not a dictionary

# Apply deep cleaning to labeled_summaries column
annotated_post_df['labeled_summaries'] = annotated_post_df['labeled_summaries'].apply(deep_clean_labeled_summaries)

annotated_post_df

# Function to replace NaN values with empty lists in labeled_summaries
def replace_nan_with_empty_lists(entry):
    if isinstance(entry, dict):
        return {key: (value if isinstance(value, list) else [] if (isinstance(value, float) and np.isnan(value)) else value) for key, value in entry.items()}
    return entry

# Apply the function to replace NaNs in labeled_summaries
annotated_post_df['labeled_summaries'] = annotated_post_df['labeled_summaries'].apply(replace_nan_with_empty_lists)
annotated_post_df

second_set_df
combined_df = pd.concat([annotated_post_df, second_set_df], ignore_index=True)
combined_df
# Remove rows where 'labeled_summaries' is None from combined_df
cleaned_combined_df = combined_df.dropna(subset=['labeled_summaries'])
cleaned_combined_df

In [None]:
# Ensure RESPONSE (labeled_summaries) is properly formatted as a JSON string
cleaned_combined_df['labeled_summaries'] = cleaned_combined_df['labeled_summaries'].apply(json.dumps)

cleaned_combined_df

In [None]:
def convert_to_alpaca_format(df, input_col='selftext', output_col='labeled_summaries'):
    """Converts a DataFrame into Alpaca fine-tuning JSON format."""
    instruction = """You are an expert in sociolinguistics of medical language, focused on studying pain descriptions using the Biopsychosocial model.

Your task is to extract pain descriptions from the following Reddit post and format them into JSON under these structured headers:

- 'Location': where in the body the pain occurs, can also be general/all over if indicated.
- 'Severity': intensity of pain in patient’s words.
- 'Duration': how long the pain has been felt.
- 'Sex/Gender': author's sex or gender, if mentioned.
- 'Age': author's age, if mentioned.
- 'Comorbidities': any comorbidities mentioned in the post alongside lupus.
- 'Management': followed by a list of any methods or medications mentioned by the author used to treat the pain, as well as any mentions of adherence or barriers/facilitators to access.
- 'Affective': Emotional context of pain, including depression, fear, anxiety, stress, or any described emotional sentiment or reactions related to the pain.
- 'Cognitive': Any descriptions of pain impact on mental processes, including coping, catastrophizing, ability to focus, or memory.
- 'Behavioral': Any changes in a person's day-to-day actions resulting from pain, including substance use, fear of movement, and others.
- 'Personal/Existential/Spiritual': Pain impact on identity, threat to self, personal development, dissatisfaction with life, sense of meaning of life, fear of death, existential concerns, and impacts on or resulting from religious or spiritual beliefs/practices.
- 'Sociocultural': Any language on how a patient's sociocultural context influenced pain, including stigma, invalidation, identified individuals (including healthcare professionals) providing support, caregiving, and loss of autonomy.
- 'Functional': Impact on daily living activities.
- 'Economic': Pain impact on ability to work, medicolegal issues, insurance/compensation issues, work environment/job dissatisfaction, absenteeism, and presenteeism.
- 'Nociplastic': Defined as "pain that arises from altered nociception despite: (1) no clear evidence of actual or threatened tissue damage that causes peripheral nociceptor activation or (2) no clear evidence of disease or lesion of the somatosensory system causing the pain." Nociplastic pain symptoms include widespread pain, fibromyalgia, vulvodynia, urologic chronic pelvic pain syndrome, chronic tension-type headache, chronic low back pain, migraine, temporomandibular disorder. Non-pain symptoms include fatigue, sleep changes, brain fog, uncertainty, overwhelm, helplessness, sensitivity to non-pain stimuli, medication sensitivity and side effects, and irritable bowel syndrome.

Ensure your response should result in only one valid JSON object per post. Strictly adhere to the structure and order.
"""

    alpaca_data = []

    for _, row in df.iterrows():
        alpaca_data.append({
            "INSTRUCTION": instruction + row[input_col],
            "RESPONSE": row[output_col]
        })

    return alpaca_data
alpaca_formatted_data = convert_to_alpaca_format(cleaned_combined_df)
alpaca_formatted_data_df = pd.DataFrame(alpaca_formatted_data)
alpaca_formatted_hf_dataset = Dataset.from_pandas(alpaca_formatted_data_df, split = "train")


In [None]:
dataset_split = alpaca_formatted_hf_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = dataset_split['train']
test_dataset = dataset_split['test']
dataset_split, train_dataset, test_dataset

(DatasetDict({
     train: Dataset({
         features: ['INSTRUCTION', 'RESPONSE'],
         num_rows: 42
     })
     test: Dataset({
         features: ['INSTRUCTION', 'RESPONSE'],
         num_rows: 5
     })
 }),
 Dataset({
     features: ['INSTRUCTION', 'RESPONSE'],
     num_rows: 42
 }),
 Dataset({
     features: ['INSTRUCTION', 'RESPONSE'],
     num_rows: 5
 }))

# Loading model


In [None]:
model_name = "meta-llama/Llama-3.2-3B-Instruct"

# "meta-llama/Llama-3.2-1B-Instruct" decent performance, some formatting errors
#  Load model after clearing memory
# Check if the model is on the right device
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                          padding_side = 'right',
                                          use_fast = False)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Ensure the tokenizer has a pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Ensure pad_token_id is set correctly
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id


In [None]:
def formatting_prompts_func(mydataset):
    output_texts = []
    for i in range(len(mydataset['INSTRUCTION'])):
        text = (
            f"### Instruction:\n{mydataset['INSTRUCTION'][i]}"
            f"\n\n### Response:\n{mydataset['RESPONSE'][i]}"
        )
        output_texts.append(text)
    return output_texts


In [None]:
# Get token lengths
instruction_lengths = [len(tokenizer(inst)["input_ids"]) for inst in train_dataset["INSTRUCTION"]]

# Check distribution
print(f"Max length: {max(instruction_lengths)}")
print(f"90th percentile: {np.percentile(instruction_lengths, 90)}")
print(f"95th percentile: {np.percentile(instruction_lengths, 95)}")



Max length: 1717
90th percentile: 1001.1999999999999
95th percentile: 1082.1


# LoRA Config

In [None]:
# lora_config = LoraConfig(
#    r=16,  # Low-rank dimension
#    lora_alpha=32,  # Scaling factor
#    target_modules=["q_proj", "v_proj"],
#    lora_dropout=0.05,  # Dropout rate
#    task_type=TaskType.CAUSAL_LM  # Task causal language model
#)

# model = get_peft_model(model, lora_config)

In [None]:
response_template = "### Response:\n"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)


# Training Parameters

In [None]:
torch.cuda.empty_cache()

os.environ["WANDB_DISABLED"] = "true"


In [None]:
training_args = SFTConfig(
    output_dir="/tmp",
    num_train_epochs=4,
    save_strategy="epoch",
    bf16=True,
    per_device_train_batch_size=1,  # Reduce batch size
    per_device_eval_batch_size=1,  # Reduce batch size
    max_seq_length=2048,
    do_eval=True,
    report_to = None
)

trainer = SFTTrainer(
    model,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    formatting_func=formatting_prompts_func,
    args=training_args,
    packing=False,
    data_collator=collator,
)


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Map:   0%|          | 0/42 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

# Training

In [None]:
trainer.train()

Step,Training Loss


TrainOutput(global_step=168, training_loss=0.23638536816551572, metrics={'train_runtime': 332.3714, 'train_samples_per_second': 0.505, 'train_steps_per_second': 0.505, 'total_flos': 2860171703672832.0, 'train_loss': 0.23638536816551572, 'epoch': 4.0})

# Model evaluation

* Need to edit the steps

# Model testing and evaluation

In [None]:
def formatting_prompts_func_no_response(mydataset):
    output_texts = []
    for i in range(len(mydataset['INSTRUCTION'])):
        text = (
            f"### Instruction:\n{mydataset['INSTRUCTION'][i]}"
            f"\n\n### Response:\n"
        )
        output_texts.append(text)
    return output_texts


In [None]:
only_instructions = formatting_prompts_func_no_response(test_dataset)


In [None]:
print(only_instructions[0])


In [None]:
class ListDataset(Dataset):
    def __init__(self, original_list):
        self.original_list = original_list

    def __len__(self):
        return len(self.original_list)

    def __getitem__(self, i):
        return self.original_list[i]

instructions_final = ListDataset(only_instructions)


In [None]:
gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    model_kwargs={"batch_size": 1},  # Correct way to set batch size
    max_length=1024,  # Adjust for long subreddit posts
    return_full_text=True,  # Only return generated text
    do_sample=False,  # Use deterministic beam search instead of sampling
)



Device set to use cuda:0


In [None]:
test_input = tokenizer(instructions_final[:5], padding=True, truncation=True, return_tensors="pt")
print(test_input)


{'input_ids': tensor([[128000,  14711,  30151,  ..., 128009, 128009, 128009],
        [128000,  14711,  30151,  ..., 128009, 128009, 128009],
        [128000,  14711,  30151,  ...,  14711,   6075,    512],
        [128000,  14711,  30151,  ..., 128009, 128009, 128009],
        [128000,  14711,  30151,  ..., 128009, 128009, 128009]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}


In [None]:
# Set tokenizer padding side correctly
tokenizer.padding_side = 'right'

# Debugging environment variables
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

# Disable gradient computation for inference
with torch.no_grad():
    pipeline_iterator = gen_pipeline(
        instructions_final[:],
        max_new_tokens=1024,
        num_beams= 1,
        early_stopping= True,
    )

# Store generated outputs
generated_outputs_base = []
for text in pipeline_iterator:
    generated_outputs_base.append(text[0]["generated_text"])

# Print a sample output for debugging
print(generated_outputs_base[0])




# Assess Model Output Summaries

In [None]:
for i in range(len(generated_outputs_base)):
  print('---------------------')
  print(i)
  print(instructions_final[i])
  print('\n')
  print(generated_outputs_base[i])
  print('---------------------')
  print('\n')



In [None]:
# Save the model and tokenizer
trainer.save_model("./llama-3.2-instruct-3b-fine-tuned-model-second-run")
tokenizer.save_pretrained("./llama-3.2-instruct-3b-fine-tuned-model-second-run")


('./llama-3.2-instruct-3b-fine-tuned-model-second-run/tokenizer_config.json',
 './llama-3.2-instruct-3b-fine-tuned-model-second-run/special_tokens_map.json',
 './llama-3.2-instruct-3b-fine-tuned-model-second-run/tokenizer.json')

In [None]:
patient_sle_flair_df = pd.read_csv("pain_matched_SLE_flair.csv")
patient_sle_flair_df

In [None]:
# Remove any posts from patient_sle_flair_df with authors in annotated_post_df

filtered_patient_sle_flair_df = patient_sle_flair_df[~patient_sle_flair_df['selftext'].isin(annotated_post_df['selftext'])]

# Check the resulting dataframe
filtered_patient_sle_flair_df.shape  # See how many rows remain

filtered_patient_sle_flair_df['labeled_summaries'] = ""
# Convert to
filtered_patient_sle_flair_df

format_sle_flair_df_1 = convert_to_alpaca_format(filtered_patient_sle_flair_df)
format_sle_flair_df_1[0]

In [None]:
formatted_hf_dataset_sle_flair_df = pd.DataFrame(format_sle_flair_df_1)

formatted_hf_dataset_sle_flair_ready = Dataset.from_pandas(formatted_hf_dataset_sle_flair_df, split = "train")

dataset_split = formatted_hf_dataset_sle_flair_ready.train_test_split(test_size=0.674, seed=42)
reliability_sle_dataset = dataset_split['train']
heldout_dataset = dataset_split['test']
dataset_split, reliability_sle_dataset, heldout_dataset

(DatasetDict({
     train: Dataset({
         features: ['INSTRUCTION', 'RESPONSE'],
         num_rows: 500
     })
     test: Dataset({
         features: ['INSTRUCTION', 'RESPONSE'],
         num_rows: 1034
     })
 }),
 Dataset({
     features: ['INSTRUCTION', 'RESPONSE'],
     num_rows: 500
 }),
 Dataset({
     features: ['INSTRUCTION', 'RESPONSE'],
     num_rows: 1034
 }))

In [None]:
reliability_sle_dataset


Dataset({
    features: ['INSTRUCTION', 'RESPONSE'],
    num_rows: 500
})

In [None]:
only_instructions_sle = formatting_prompts_func_no_response(reliability_sle_dataset)


instructions_final_sle = ListDataset(only_instructions_sle)

# Set tokenizer padding side correctly
tokenizer.padding_side = 'right'

# Debugging environment variables
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

# Disable gradient computation for inference
with torch.no_grad():
    pipeline_iterator = gen_pipeline(
        instructions_final_sle[:],
        max_new_tokens=1024,
        num_beams= 1,
        early_stopping= True
    )

# Store generated outputs
generated_outputs_base_sle = []
for text in pipeline_iterator:
    generated_outputs_base_sle.append(text[0]["generated_text"])

# Print a sample output for debugging
print(generated_outputs_base_sle[0])

Both `max_new_tokens` (=1024) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=1024) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=1024) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=1024) and `max_length`(=1024) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_gene

### Instruction:
You are an expert in sociolinguistics of medical language, focused on studying pain descriptions using the Biopsychosocial model.

Your task is to extract pain descriptions from the following Reddit post and format them into JSON under these structured headers:

- 'Location': where in the body the pain occurs, can also be general/all over if indicated.
- 'Severity': intensity of pain in patient’s words.
- 'Duration': how long the pain has been felt.
- 'Sex/Gender': author's sex or gender, if mentioned.
- 'Age': author's age, if mentioned.
- 'Comorbidities': any comorbidities mentioned in the post alongside lupus.
- 'Management': followed by a list of any methods or medications mentioned by the author used to treat the pain, as well as any mentions of adherence or barriers/facilitators to access.
- 'Affective': Emotional context of pain, including depression, fear, anxiety, stress, or any described emotional sentiment or reactions related to the pain.
- 'Cognitive': Any

In [None]:
import re
# Function to extract instruction and response
def extract_instruction_response(text):
    instruction_match = re.search(r'### Instruction:\s*(.*?)\s*### Response:', text, re.DOTALL)
    response_match = re.search(r'### Response:\s*(.*)', text, re.DOTALL)

    instruction = instruction_match.group(1).strip() if instruction_match else ""
    response = response_match.group(1).strip() if response_match else ""

    return instruction, response

# Process each item in the list
data = [extract_instruction_response(item) for item in generated_outputs_base_sle]

# Convert to DataFrame
df_generated = pd.DataFrame(data, columns=['instruction', 'response'])
df_generated.to_csv("final_500_sample.csv")


In [None]:
df_generated

In [None]:
heldout_dataset.save_to_disk("heldout_500_dataset.hf")
reliability_sle_dataset.save_to_disk("heldout_500_sle_dataset")


Saving the dataset (0/1 shards):   0%|          | 0/1034 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/500 [00:00<?, ? examples/s]

In [None]:
df_generated = pd.read_csv("final_500_sample.csv")


In [None]:
df_generated.head(10)

# instruction: cut after "Strictly adhere to the structure and order."
# response: cut at and after second instance of .'{'Affective':

In [None]:
# Function to clean instruction column (keeping everything AFTER)
def clean_instruction(text):
    return re.sub(r".*?Strictly adhere to the structure and order\.\n", "", text, flags=re.DOTALL).strip()

# Function to clean response column
def clean_response(text):
    # Cut everything before the first occurrence of "{'Affective':"
    text = re.sub(r"^.*?(\{'Affective':)", r"\1", text, flags=re.DOTALL)

    # Find occurrences of "Affective" (with or without brackets)
    matches = list(re.finditer(r"Affective", text))
    if len(matches) >= 2:
        text = text[:matches[1].start()].strip()  # Cut at second occurrence

    # Remove everything after "}[]'"
    text = re.split(r"\}\'\[\]\'", text, maxsplit=1)[0].strip()

    # Remove any trailing repetitions of "'[]' '[]'" and variations like "\"}'[]'\"}"
    text = re.sub(r"(\s*'?\"?\}'?\[\]\'?\"?\}?)+$", "", text).strip()

    return text

# Apply transformations
df_generated['instruction'] = df_generated['instruction'].apply(clean_instruction)
df_generated['response'] = df_generated['response'].apply(clean_response)

df_generated


In [None]:

df_generated.to_csv("formatted_500_llm_lupus_final_set.csv")