In [1]:
import requests
import os

# Target folder (your local path)
save_dir = r"C:\Users\hp\Desktop\fyp\mvp\M2-meme-gen\meme-templates-updated"
os.makedirs(save_dir, exist_ok=True)

# Fetch meme templates from Imgflip
print("Fetching meme templates...")
response = requests.get("https://api.imgflip.com/get_memes")
memes = response.json().get("data", {}).get("memes", [])

print(f"Found {len(memes)} meme templates.")

# Download each template
for meme in memes:
    name = meme["name"].replace(" ", "_").replace("/", "_").replace("\\", "_")
    url = meme["url"]
    ext = os.path.splitext(url)[-1] or ".jpg"  # default extension
    filename = os.path.join(save_dir, f"{name}{ext}")

    try:
        img_data = requests.get(url, timeout=10).content
        with open(filename, "wb") as f:
            f.write(img_data)
        print(f"✅ Downloaded: {name}")
    except Exception as e:
        print(f"❌ Failed to download {name}: {e}")

print("Done.")

Fetching meme templates...
Found 100 meme templates.
✅ Downloaded: Drake_Hotline_Bling
✅ Downloaded: Two_Buttons
✅ Downloaded: Distracted_Boyfriend
✅ Downloaded: UNO_Draw_25_Cards
✅ Downloaded: Bernie_I_Am_Once_Again_Asking_For_Your_Support
✅ Downloaded: Left_Exit_12_Off_Ramp
✅ Downloaded: Running_Away_Balloon
✅ Downloaded: Disaster_Girl
✅ Downloaded: Gru's_Plan
✅ Downloaded: Waiting_Skeleton
✅ Downloaded: Always_Has_Been
✅ Downloaded: Change_My_Mind
✅ Downloaded: Sad_Pablo_Escobar
✅ Downloaded: Epic_Handshake
✅ Downloaded: Batman_Slapping_Robin
✅ Downloaded: Anakin_Padme_4_Panel
✅ Downloaded: Marked_Safe_From
✅ Downloaded: Woman_Yelling_At_Cat
✅ Downloaded: X,_X_Everywhere
✅ Downloaded: Buff_Doge_vs._Cheems
✅ Downloaded: Mocking_Spongebob
✅ Downloaded: Trade_Offer
✅ Downloaded: Expanding_Brain
✅ Downloaded: Bike_Fall
✅ Downloaded: I_Bet_He's_Thinking_About_Other_Women
✅ Downloaded: Tuxedo_Winnie_The_Pooh
✅ Downloaded: One_Does_Not_Simply
✅ Downloaded: Is_This_A_Pigeon
✅ Downloaded: Th

In [2]:
import requests
from bs4 import BeautifulSoup
import os
import time

# Set your save directory
save_dir = r"C:\Users\hp\Desktop\fyp\mvp\M2-meme-gen\meme-templates-updated-2"
os.makedirs(save_dir, exist_ok=True)

# Base URL for meme templates
base_url = "https://imgflip.com/memetemplates"

# Sanitize filenames
def sanitize_filename(name):
    return "".join(c if c.isalnum() or c in (' ', '_') else '_' for c in name).strip().replace(' ', '_')

def scrape_imgflip_templates(max_templates=1000):
    downloaded = 0
    page = 1
    seen_urls = set()

    while downloaded < max_templates:
        url = f"{base_url}?page={page}"
        print(f"🔄 Scraping page {page}...")

        res = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        if res.status_code != 200:
            print(f"❌ Failed to load page {page}")
            break

        soup = BeautifulSoup(res.text, "html.parser")
        boxes = soup.select("div.mt-box")

        if not boxes:
            print("✅ No more templates found.")
            break

        for box in boxes:
            if downloaded >= max_templates:
                break

            # Extract name
            title_tag = box.select_one("h3.mt-title a")
            if not title_tag:
                continue
            name = title_tag.text.strip()

            # Extract image URL
            img_tag = box.select_one("img")
            if not img_tag or "src" not in img_tag.attrs:
                continue
            img_url = img_tag["src"]
            if img_url.startswith("//"):
                img_url = "https:" + img_url

            # Skip duplicates
            if img_url in seen_urls:
                continue
            seen_urls.add(img_url)

            filename = os.path.join(save_dir, sanitize_filename(name) + ".jpg")
            if os.path.exists(filename):
                continue

            try:
                img_data = requests.get(img_url, timeout=10).content
                with open(filename, "wb") as f:
                    f.write(img_data)
                downloaded += 1
                print(f"✅ ({downloaded}) {name}")
            except Exception as e:
                print(f"❌ Failed to download {name}: {e}")

        page += 1
        time.sleep(1)  # Be polite

    print(f"\n🎉 Finished! Downloaded {downloaded} meme templates.")

# Run the scraper
scrape_imgflip_templates(max_templates=1000)

🔄 Scraping page 1...
✅ (1) Drake Hotline Bling
✅ (2) Two Buttons
✅ (3) Distracted Boyfriend
✅ (4) UNO Draw 25 Cards
✅ (5) Bernie I Am Once Again Asking For Your Support
✅ (6) Left Exit 12 Off Ramp
✅ (7) Running Away Balloon
✅ (8) Disaster Girl
✅ (9) Gru's Plan
✅ (10) Waiting Skeleton
✅ (11) Always Has Been
✅ (12) Sad Pablo Escobar
✅ (13) Change My Mind
✅ (14) Epic Handshake
✅ (15) Batman Slapping Robin
✅ (16) Anakin Padme 4 Panel
✅ (17) Marked Safe From
✅ (18) Woman Yelling At Cat
✅ (19) X, X Everywhere
✅ (20) Buff Doge vs. Cheems
✅ (21) Mocking Spongebob
✅ (22) Trade Offer
✅ (23) Bike Fall
✅ (24) Expanding Brain
✅ (25) I Bet He's Thinking About Other Women
✅ (26) Tuxedo Winnie The Pooh
✅ (27) One Does Not Simply
✅ (28) Is This A Pigeon
✅ (29) They're The Same Picture
✅ (30) Bernie Sanders Once Again Asking
✅ (31) Monkey Puppet
✅ (32) Y'all Got Any More Of That
✅ (33) This Is Fine
✅ (34) Success Kid
✅ (35) Ancient Aliens
✅ (36) Hide the Pain Harold
✅ (37) Mother Ignoring Kid Drowning I

---

In [10]:
import os
import pandas as pd
import json
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables
load_dotenv()

# Initialize OpenAI client (v1.x)
client = OpenAI(api_key=os.getenv("OPENAI_KEY"))

# === CONFIGURATION ===
TEMPLATE_DIR = r"C:\Users\hp\Desktop\fyp\mvp\M2-meme-gen\meme-templates-updated-2"
BATCH_SIZE = 20
MODEL = "gpt-4"
OUTPUT_FILE = "meme_features_new.csv"

# === ENUMS for Constrained Fields ===
OPTION_SETS = {
    "Primary Audience": ["Gen Z", "Millennials", "Young Adults", "Parents", "Professionals", "Gamers", "Students", "General Public"],
    "Humor Style": ["Sarcastic", "Wholesome", "Witty", "Relatable", "Cringe", "Absurd"]
}

# === Prompt Builder ===
def build_prompt(meme_names: list) -> str:
    formatted_memes = "\n".join(f"- {name}" for name in meme_names)
    return f"""
You are an AI trained to classify meme templates for marketing analysis.

For each meme template, return a JSON object with the following fields:
- Meme Name (string)
- Primary Audience (one from: {OPTION_SETS['Primary Audience']})
- Humor Style (one from: {OPTION_SETS['Humor Style']})
- Keywords (5–10 general descriptive terms, not including the meme name)

❌ Do NOT include the meme title in the keywords.
✅ Keywords should reflect the meme's emotional or situational context.

Meme Templates:
{formatted_memes}

Return only a single JSON object in this format: {{ "memes": [ ... ] }}
"""

# === Call OpenAI API ===
def call_openai_batch(batch_names):
    prompt = build_prompt(batch_names)
    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.5,
    )
    content = response.choices[0].message.content.strip()
    return json.loads(content)["memes"]

# === Main Runner ===
def run_minimal_meme_extraction():
    # Extract meme names from filenames
    meme_names = [os.path.splitext(f)[0].replace("_", " ") for f in os.listdir(TEMPLATE_DIR)
                  if f.lower().endswith((".jpg", ".png"))]
    all_results = []

    print(f"🗂 Found {len(meme_names)} meme templates.")

    for i in range(0, len(meme_names), BATCH_SIZE):
        batch = meme_names[i:i + BATCH_SIZE]
        print(f"🔍 Processing batch {i//BATCH_SIZE + 1}...")
        try:
            results = call_openai_batch(batch)
            all_results.extend(results)
        except Exception as e:
            print(f"❌ Error in batch {batch}: {e}")
            continue

    # Save results
    df = pd.DataFrame(all_results)
    df.to_csv(OUTPUT_FILE, index=False)
    print(f"\n✅ Done! Saved to {OUTPUT_FILE}")

# Run
if __name__ == "__main__":
    run_minimal_meme_extraction()


🗂 Found 834 meme templates.
🔍 Processing batch 1...
🔍 Processing batch 2...
🔍 Processing batch 3...
🔍 Processing batch 4...
🔍 Processing batch 5...
🔍 Processing batch 6...
🔍 Processing batch 7...
🔍 Processing batch 8...
🔍 Processing batch 9...
🔍 Processing batch 10...
🔍 Processing batch 11...
🔍 Processing batch 12...
🔍 Processing batch 13...
🔍 Processing batch 14...
🔍 Processing batch 15...
🔍 Processing batch 16...
🔍 Processing batch 17...
🔍 Processing batch 18...
🔍 Processing batch 19...
🔍 Processing batch 20...
🔍 Processing batch 21...
🔍 Processing batch 22...
🔍 Processing batch 23...
🔍 Processing batch 24...
🔍 Processing batch 25...
🔍 Processing batch 26...
🔍 Processing batch 27...
🔍 Processing batch 28...
🔍 Processing batch 29...
🔍 Processing batch 30...
🔍 Processing batch 31...
🔍 Processing batch 32...
🔍 Processing batch 33...
🔍 Processing batch 34...
🔍 Processing batch 35...
🔍 Processing batch 36...
🔍 Processing batch 37...
🔍 Processing batch 38...
🔍 Processing batch 39...
🔍 Proc

In [11]:
import os
import re

TEMPLATE_FOLDER = r"C:\Users\hp\Desktop\fyp\mvp\M2-meme-gen\meme-templates-updated-2"

def normalize_filename(name):
    # Remove leading/trailing whitespace and extension
    base, ext = os.path.splitext(name)
    # Replace spaces and repeated underscores with a single underscore
    normalized = re.sub(r'[\s]+', '_', base)
    normalized = re.sub(r'[_]+', '_', normalized)
    normalized = normalized.lower().strip('_')
    return normalized + ext.lower()

def normalize_all_filenames(folder):
    renamed = []
    for filename in os.listdir(folder):
        old_path = os.path.join(folder, filename)
        if not os.path.isfile(old_path):
            continue

        new_filename = normalize_filename(filename)
        new_path = os.path.join(folder, new_filename)

        # Only rename if different
        if old_path != new_path:
            os.rename(old_path, new_path)
            renamed.append((filename, new_filename))

    return renamed

# === Run the normalization ===
renamed_files = normalize_all_filenames(TEMPLATE_FOLDER)
print("✅ Normalized filenames:")
for old, new in renamed_files:
    print(f"{old} → {new}")


✅ Normalized filenames:
0_days_without__Lenny__Simpsons_.jpg → 0_days_without_lenny_simpsons.jpg
1984_Calendar.jpg → 1984_calendar.jpg
3D_Nerd_Emoji.jpg → 3d_nerd_emoji.jpg
3_Spiderman_Pointing.jpg → 3_spiderman_pointing.jpg
4_Spiderman_pointing_at_each_other.jpg → 4_spiderman_pointing_at_each_other.jpg
Aaaaand_Its_Gone.jpg → aaaaand_its_gone.jpg
Absolute_Cinema.jpg → absolute_cinema.jpg
Adam_Silver_Get_Ready_To_Learn_Buddy.jpg → adam_silver_get_ready_to_learn_buddy.jpg
Afraid_To_Ask_Andy.jpg → afraid_to_ask_andy.jpg
After_all___why_not_.jpg → after_all_why_not.jpg
AJ_Styles___Undertaker.jpg → aj_styles_undertaker.jpg
All_My_Homies_Hate.jpg → all_my_homies_hate.jpg
Always_Has_Been.jpg → always_has_been.jpg
Always_you_three.jpg → always_you_three.jpg
Amateurs.jpg → amateurs.jpg
am_I_a_joke_to_you.jpg → am_i_a_joke_to_you.jpg
Am_I_The_Only_One_Around_Here.jpg → am_i_the_only_one_around_here.jpg
Ancient_Aliens.jpg → ancient_aliens.jpg
ANDY_DROPPING_WOODY.jpg → andy_dropping_woody.jpg
Andy