<a href="https://colab.research.google.com/github/atulsingh200/Python-Project/blob/main/Untitled20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import json
import logging
from datetime import datetime, timedelta
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Step 1: Dataset Setup
def create_sample_data(num_campaigns=10):
    logging.info("Creating sample dataset...")
    data = {
        'Campaign ID': [f'Campaign_{i+1}' for i in range(num_campaigns)],
        'Impressions': np.random.randint(1000, 10000, num_campaigns),
        'Clicks': np.random.randint(0, 1000, num_campaigns),
        'Conversions': np.random.randint(0, 500, num_campaigns),
        'Spend': np.random.uniform(100, 1000, num_campaigns),
        'Revenue': np.random.uniform(1000, 5000, num_campaigns),
        'Status': ['Active'] * num_campaigns,
        'Date': [datetime.now() - timedelta(days=np.random.randint(1, 10)) for _ in range(num_campaigns)],
        'Ad Copy': [f"Sample ad text for campaign {i+1}" for i in range(num_campaigns)]
    }
    return pd.DataFrame(data)

# Load sample data
campaign_data = create_sample_data()

# Step 2: Define Rules for Campaign Optimization
def calculate_metrics(df):
    logging.info("Calculating campaign metrics...")
    df['CTR'] = np.where(df['Impressions'] > 0, df['Clicks'] / df['Impressions'], np.nan)
    df['CPA'] = np.where(df['Conversions'] > 0, df['Spend'] / df['Conversions'], np.nan)
    df['ROAS'] = np.where(df['Spend'] > 0, df['Revenue'] / df['Spend'], np.nan)

def optimize_campaigns(df):
    logging.info("Applying optimization rules...")
    actions = []
    for index, row in df.iterrows():
        campaign_id = row['Campaign ID']
        if row['CTR'] < 0.01:
            actions.append({'Campaign ID': campaign_id, 'Action': 'Pause', 'Reason': 'Low CTR'})
            df.at[index, 'Status'] = 'Paused'
        elif row['ROAS'] > 4:
            actions.append({'Campaign ID': campaign_id, 'Action': 'Increase Budget', 'Reason': 'High ROAS'})
            df.at[index, 'Spend'] *= 1.2
        elif row['ROAS'] < 1.5:
            actions.append({'Campaign ID': campaign_id, 'Action': 'Decrease Budget', 'Reason': 'Low ROAS'})
            df.at[index, 'Spend'] *= 0.8
    return actions

# Hugging Face Integration with Flan-T5
def generate_llm_insights(df):
    logging.info("Generating LLM-powered insights...")
    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
    model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl")

    insights = []
    for _, row in df.iterrows():
        campaign_id = str(row['Campaign ID'])
        ad_copy = str(row['Ad Copy'])
        ctr = f"{row['CTR']:.2f}" if not pd.isna(row['CTR']) else 'N/A'
        roas = f"{row['ROAS']:.2f}" if not pd.isna(row['ROAS']) else 'N/A'
        cpa = f"{row['CPA']:.2f}" if not pd.isna(row['CPA']) else 'N/A'

        prompt = (
            f"Analyze the following campaign data and suggest improvements:\n\n"
            f"Campaign ID: {campaign_id}\n"
            f"Ad Copy: {ad_copy}\n"
            f"CTR: {ctr}\n"
            f"ROAS: {roas}\n"
            f"CPA: {cpa}\n\n"
            f"Suggest ways to improve performance."
        )

        try:
            input_ids = tokenizer(prompt, return_tensors="pt").input_ids
            outputs = model.generate(input_ids, max_length=150, num_return_sequences=1)
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            insights.append({'Campaign ID': campaign_id, 'Insight': generated_text.strip()})
        except Exception as e:
            insights.append({'Campaign ID': campaign_id, 'Insight': f"Error: {str(e)}"})
    return insights

# Step 3: Build the Automation Pipeline
def run_automation_pipeline(df):
    calculate_metrics(df)
    actions = optimize_campaigns(df)
    llm_insights = generate_llm_insights(df)
    return actions, llm_insights

# Step 4: Reporting
def generate_report(actions, insights, df):
    logging.info("Generating campaign report...")
    report = {
        'date': str(datetime.now()),
        'actions_taken': actions,
        'llm_insights': insights,
        'updated_campaigns': df[['Campaign ID', 'Status', 'Spend', 'CTR', 'ROAS']].to_dict(orient='records')
    }
    return report

def save_report(report):
    with open('campaign_report.json', 'w') as f:
        json.dump(report, f, indent=4)
    logging.info("Report saved to campaign_report.json.")

# Execute the pipeline
try:
    logging.info("Running the marketing automation pipeline...")
    actions_taken, llm_generated_insights = run_automation_pipeline(campaign_data)
    report = generate_report(actions_taken, llm_generated_insights, campaign_data)
    save_report(report)
    logging.info("Pipeline executed successfully.")
except Exception as e:
    logging.error(f"Pipeline execution failed: {str(e)}")

# Display final DataFrame and report
print(campaign_data)
print(json.dumps(report, indent=4))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

   Campaign ID  Impressions  Clicks  Conversions       Spend      Revenue  \
0   Campaign_1         6078     594          378  307.737714  3359.955223   
1   Campaign_2         2621     503           57  727.828578  2820.793415   
2   Campaign_3         1632      45          176  896.108830  2423.353248   
3   Campaign_4         8274     172          410  357.047661  2879.374106   
4   Campaign_5         6505      48          493  871.891533  2933.608047   
5   Campaign_6         9368     154           94  878.622659  2808.893883   
6   Campaign_7         5887     489          463  812.941751  2261.068076   
7   Campaign_8         2362     915           67  313.366770  3492.012653   
8   Campaign_9         6099     651          134  757.015991  1511.538326   
9  Campaign_10         4235     365          481  385.800806  3671.153433   

   Status                       Date                         Ad Copy  \
0  Active 2024-12-05 06:47:14.668312   Sample ad text for campaign 1   
1  Activ