In [13]:
import pandas as pd
import json

# Load Excel file
file_path = "/teamspace/studios/this_studio/Copy of evaluation_llama_by_themes_100(1).xlsx"
df = pd.read_excel(file_path)

# Ensure column names are correct
df = df[['Image Name', "Caption", 'Theme', 'Generated Script']]  # Adjust if necessary

# Convert DataFrame to list of dictionaries
data = df.to_dict(orient="records")

# Save as JSON
json_file_path = "/teamspace/studios/this_studio/fine_tuning_data.json"
with open(json_file_path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=4, ensure_ascii=False)

print(f"Dataset saved as {json_file_path}")


Dataset saved as /teamspace/studios/this_studio/fine_tuning_data.json


In [2]:
import os
import torch
import json
from datasets import load_dataset, DatasetDict
from transformers import (
    TrainingArguments,
    Trainer,
    AutoTokenizer,
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import copy

# 🚀 Enable memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Load JSON dataset
json_path = "/teamspace/studios/this_studio/fine_tuning_data.json"
dataset = DatasetDict({
    "train": load_dataset("json", data_files=json_path, split="train[:90%]"),
    "validation": load_dataset("json", data_files=json_path, split="train[90%:]")
})

# Load tokenizer
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token

# Tokenization function
def tokenize_function(examples):
    inputs = [f"Caption: {c} Theme: {t} Script: {s}" for c, t, s in zip(examples["Caption"], examples["Theme"], examples["Generated Script"])]
    tokenized = tokenizer(inputs, truncation=False, padding="longest")
    tokenized["labels"] = copy.deepcopy(tokenized["input_ids"])
    return tokenized

# Apply tokenization
tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["Image Name", "Caption", "Theme", "Generated Script"])

# Save tokenized datasets
torch.save(tokenized_datasets["train"], "train_dataset.pt")
torch.save(tokenized_datasets["validation"], "val_dataset.pt")

# Reload tokenized dataset
train_dataset = torch.load("train_dataset.pt")
val_dataset = torch.load("val_dataset.pt")

# 🚀 4-bit Quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# 🚀 Load Quantized Model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# 🚀 Prepare Model for LoRA Training
model = prepare_model_for_kbit_training(model)

# 🚀 LoRA Configuration (Train Small Adapter Layers)
lora_config = LoraConfig(
    r=16,  # Rank of LoRA adapters
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.1,  # Dropout for stability
    task_type="CAUSAL_LM"
)

# 🚀 Apply LoRA to Model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False
)

# 🚀 Training arguments (Optimized for LoRA + 4-bit)
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=4,  # ✅ LoRA reduces memory, so batch size can increase
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=16,
    save_total_limit=2,
    num_train_epochs=11,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    fp16=True,  # ✅ FP16 speeds up training
    gradient_checkpointing=True,
    torch_compile=False,  # ✅ Fix InternalTorchDynamoError
)

# Free up GPU memory before training
torch.cuda.empty_cache()

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# 🚀 Start Training
trainer.train()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 4,587,520 || all params: 3,217,337,344 || trainable%: 0.1426


Epoch,Training Loss,Validation Loss
1,No log,1.065352
2,1.219500,1.020484
3,1.153000,0.986873
4,1.109000,0.963824
5,1.078600,0.944292
6,1.078600,0.927648
7,1.059800,0.916031
8,1.024200,0.908863
9,1.048800,0.904244


TrainOutput(global_step=77, training_loss=1.0938526376501305, metrics={'train_runtime': 1879.6133, 'train_samples_per_second': 2.634, 'train_steps_per_second': 0.041, 'total_flos': 3.92347559380992e+16, 'train_loss': 1.0938526376501305, 'epoch': 9.70796460176991})

In [3]:
# Define save directory
save_directory = "/teamspace/studios/this_studio/results"

# Save model
model.save_pretrained(save_directory)

# Save tokenizer
tokenizer.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")


Model and tokenizer saved to /teamspace/studios/this_studio/results


In [1]:
import os
import torch
import time
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Paths
MODEL_PATH = "/teamspace/studios/this_studio/results"
DATASET_PATH = "/teamspace/studios/this_studio/evaluation_llama_by_themes_100.xlsx"
OUTPUT_PATH = "/teamspace/studios/this_studio/generated/generated_results.xlsx"

# Load tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

# Load model with 4-bit quantization
print("Loading model...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto"
)

# Function to generate scripts without limitations
def generate_script(caption, theme):
    input_text = f"Caption: {caption}\nTheme: {theme}\nScript:\n"
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)

    print("Generating script...")
    start_time = time.time()
    with torch.no_grad():
        output_ids = model.generate(
            input_ids,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            max_length=4096,  # Large max length to ensure full generation
            pad_token_id=tokenizer.eos_token_id
        )
    response_time = time.time() - start_time

    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    script_start = generated_text.find("Script:")
    if script_start != -1:
        generated_script = generated_text[script_start + len("Script:"):].strip()
    else:
        generated_script = generated_text.strip()

    return generated_script, response_time

# Load dataset
df = pd.read_excel(DATASET_PATH)
df.fillna("", inplace=True)

# Generate scripts for all samples
for index, row in df.iterrows():
    caption = row["Caption"]
    theme = row["Theme"]

    generated_script, response_time = generate_script(caption, theme)

    df.at[index, "Generated Script"] = generated_script
    df.at[index, "Response Time"] = response_time

# Save results to Excel
df.to_excel(OUTPUT_PATH, index=False)

print("Script generation completed successfully!")


Loading tokenizer...
Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Generating script...


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating script...
Generating sc

In [1]:
import pandas as pd
import torch
import math
import evaluate
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer, util

# Load the Excel file
data_path = "/teamspace/studios/this_studio/generated/generated_results.xlsx"
df = pd.read_excel(data_path)

df.fillna("", inplace=True)  # Replace NaN values with empty strings

# Load evaluation metrics
bleu_metric = evaluate.load("bleu")
semantic_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Load a base Llama model for perplexity calculation
MODEL_PATH = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, device_map="auto")

def compute_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt").input_ids.to(model.device)
    with torch.no_grad():
        outputs = model(input_ids=inputs, labels=inputs)
    loss = outputs.loss.item()
    return math.exp(loss)

def compute_semantic_similarity(text1, text2):
    embeddings1 = semantic_model.encode(text1, convert_to_tensor=True)
    embeddings2 = semantic_model.encode(text2, convert_to_tensor=True)
    similarity = util.pytorch_cos_sim(embeddings1, embeddings2)
    return similarity.item()

# Initialize total scores
total_bleu, total_similarity, total_accuracy, total_ppl, total_response_time = 0, 0, 0, 0, 0
num_samples = len(df)

for index, row in df.iterrows():
    untrained_script = row["Untrained Llama Script"].strip()
    generated_script = row["Generated Script"].strip()
    response_time = row["Response Time"]
    
    # Compute BLEU Score if reference exists
    if untrained_script:
        bleu_score = bleu_metric.compute(predictions=[generated_script], references=[[untrained_script]])["bleu"]
        semantic_similarity = compute_semantic_similarity(generated_script, untrained_script)
    else:
        bleu_score, semantic_similarity = 0.0, 0.0
    
    # Compute Accuracy
    accuracy = 0.2 * bleu_score + 0.8 * semantic_similarity
    
    # Compute Perplexity
    ppl_score = compute_perplexity(generated_script)
    
    # Store results
    df.at[index, "BLEU Score"] = bleu_score
    df.at[index, "Semantic Similarity"] = semantic_similarity
    df.at[index, "Accuracy"] = accuracy
    df.at[index, "Perplexity"] = ppl_score
    
    # Accumulate scores
    total_bleu += bleu_score
    total_similarity += semantic_similarity
    total_accuracy += accuracy
    total_ppl += ppl_score
    total_response_time += response_time

# Compute averages
if num_samples > 0:
    avg_bleu = total_bleu / num_samples
    avg_similarity = total_similarity / num_samples
    avg_accuracy = total_accuracy / num_samples
    avg_ppl = total_ppl / num_samples
    avg_response_time = total_response_time / num_samples
else:
    avg_bleu = avg_similarity = avg_accuracy = avg_ppl = avg_response_time = 0.0

print("\n===== AVERAGE METRICS =====")
print(f"Average BLEU Score: {avg_bleu:.4f}")
print(f"Average Semantic Similarity: {avg_similarity:.4f}")
print(f"Average Accuracy: {avg_accuracy:.4f}")
print(f"Average Perplexity: {avg_ppl:.4f}")
print(f"Average Response Time (s): {avg_response_time:.4f}")

# Save results to Excel
output_path = "/teamspace/studios/this_studio/evaluated_results.xlsx"
df.to_excel(output_path, index=False)
print(f"Results saved to: {output_path}")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


===== AVERAGE METRICS =====
Average BLEU Score: 0.1542
Average Semantic Similarity: 0.7771
Average Accuracy: 0.6525
Average Perplexity: 2.4889
Average Response Time (s): 16.2328
Results saved to: /teamspace/studios/this_studio/evaluated_results.xlsx
