In [1]:
!pip install -q transformers datasets peft accelerate bitsandbytes


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Upload your CSV
from google.colab import files
uploaded = files.upload()

Saving cleaned_campaign_data.csv to cleaned_campaign_data.csv


In [15]:
import pandas as pd
from datasets import Dataset

# Load and clean data
df = pd.read_csv(list(uploaded.keys())[0])
df = df.head(300000)

# Build completion
def map_success_label(label: int) -> str:
    return {0: "Low", 1: "Medium", 2: "High"}.get(label, "Unknown")

def build_output(row):
    return (
        f"Predicted ROI: {round(row['ROI'], 2)}\n"
        f"Predicted Conversion Rate: {round(row['Conversion_Rate'], 4)}\n"
        f"Success Probability: {map_success_label(row['Success_Label'])}\n"
        f"Recommendation: In past campaigns targeting {row['Target_Audience']} in {row['Location']}, "
        f"{row['Channel_Used']} showed {'higher' if row['Engagement_Score'] >= 5 else 'lower'} engagement."
    )

finetune_df = pd.DataFrame({
    "input_text": df["Campaign_Description"].apply(str),
    "output_text": df.apply(build_output, axis=1)
})

dataset = Dataset.from_pandas(finetune_df)

In [16]:
from transformers import AutoTokenizer

model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(example):
    input_enc = tokenizer(example["input_text"], truncation=True, padding="max_length", max_length=128)
    target_enc = tokenizer(example["output_text"], truncation=True, padding="max_length", max_length=128)
    input_enc["labels"] = target_enc["input_ids"]
    return input_enc

tokenized_dataset = dataset.map(tokenize)


Map:   0%|          | 0/300000 [00:00<?, ? examples/s]

In [17]:
from transformers import AutoModelForSeq2SeqLM
from peft import get_peft_model, LoraConfig, TaskType

# Load base model
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Setup LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q", "v"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# Wrap with PEFT
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()


trainable params: 344,064 || all params: 77,305,216 || trainable%: 0.4451


In [18]:
from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="./t5_lora_model_300000",
    per_device_train_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    save_strategy="epoch",
    report_to="none"
)

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)


  trainer = Trainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [19]:
trainer.train()


Step,Training Loss
500,14.5718
1000,4.0057
1500,2.7639
2000,2.2678
2500,2.0071
3000,1.8421
3500,1.7295
4000,1.6556
4500,1.6044
5000,1.5636


TrainOutput(global_step=112500, training_loss=1.3411070855034721, metrics={'train_runtime': 16215.4532, 'train_samples_per_second': 55.503, 'train_steps_per_second': 6.938, 'total_flos': 4.20631805952e+16, 'train_loss': 1.3411070855034721, 'epoch': 3.0})

In [20]:
model.save_pretrained("flan-t5-lora-campaignmind-300000")
tokenizer.save_pretrained("flan-t5-lora-campaignmind-300000")


('flan-t5-lora-campaignmind-300000/tokenizer_config.json',
 'flan-t5-lora-campaignmind-300000/special_tokens_map.json',
 'flan-t5-lora-campaignmind-300000/spiece.model',
 'flan-t5-lora-campaignmind-300000/added_tokens.json',
 'flan-t5-lora-campaignmind-300000/tokenizer.json')

In [21]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from peft import PeftModel, PeftConfig

# Load tokenizer and base model
model_name = "google/flan-t5-small"
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load fine-tuned LoRA model
lora_path = "/content/flan-t5-lora-campaignmind-300000"
model = PeftModel.from_pretrained(base_model, lora_path)


In [22]:
def generate_campaign_prediction(description: str, max_tokens: int = 128):
    input_ids = tokenizer(description, return_tensors="pt", truncation=True, padding=True).input_ids
    outputs = model.generate(input_ids=input_ids, max_new_tokens=max_tokens)
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result


In [23]:
test_input_1 = "A 15-day Facebook campaign for a product launch targeting Women 45–60 in Austin."
test_input_2 = "A 30-day Instagram campaign targeting Men 25-34 in Los Angeles for Market Expansion."

print("🔍 Input 1 Output:\n", generate_campaign_prediction(test_input_1))
print("\n🔍 Input 2 Output:\n", generate_campaign_prediction(test_input_2))


🔍 Input 1 Output:
 Predicted ROI: 5.62 Predicted Conversion Rate: 0.03 Success Probability: High Recommendation: In past campaigns targeting Women 45–60 in Austin, Facebook showed higher engagement.

🔍 Input 2 Output:
 Predicted ROI: 3.22 Predicted Conversion Rate: 0.07 Success Probability: Medium Recommendation: In past campaigns targeting Men 25-34 in Los Angeles, Instagram showed higher engagement.
