In [3]:
pip install requests python-dotenv openai


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
#!/usr/bin/env python3
"""
collect_assets_unstructured.py

- Input: a basic user prompt (unstructured).
- Uses OpenAI to create 4-6 similar queries (fallback if OpenAI not available).
- For each query: searches Pexels (photos + videos) and Freesound (audio).
- Saves combined results to assets_unstructured.json.

Notes:
- Prefer the official APIs (we use them here).
- This is intentionally simple and avoids heavy dependencies.
"""

import os
import time
import json
from typing import List, Dict, Any, Optional
from urllib.parse import quote_plus
from dotenv import load_dotenv
import requests

# OpenAI modern client
from openai import OpenAI

# --------------------------
# Config & keys (from .env)
# --------------------------
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
FREESOUND_API_KEY = os.getenv("FREESOUND_API_KEY")

if not PEXELS_API_KEY:
    raise RuntimeError("Please set PEXELS_API_KEY in your environment or .env file")
if not FREESOUND_API_KEY:
    raise RuntimeError("Please set FREESOUND_API_KEY in your environment or .env file")

USE_OPENAI = bool(OPENAI_API_KEY)
client = None
if USE_OPENAI:
    client = OpenAI(api_key=OPENAI_API_KEY)

# Endpoints & constants
PEXELS_PHOTO_SEARCH = "https://api.pexels.com/v1/search"
PEXELS_VIDEO_SEARCH = "https://api.pexels.com/videos/search"
PEXELS_HEADERS = {"Authorization": PEXELS_API_KEY}

FREESOUND_SEARCH = "https://freesound.org/apiv2/search/text/"
FREESOUND_HEADERS = {"Authorization": f"Token {FREESOUND_API_KEY}"}

# Limits - tune as needed
MAX_QUERIES = 6
PEXELS_PER_QUERY_PHOTOS = 4
PEXELS_PER_QUERY_VIDEOS = 3
FREESOUND_PER_QUERY = 3
DELAY_BETWEEN_PROVIDER_CALLS = 0.25  # polite delay


# --------------------------
# Helper: generate similar queries using OpenAI (or fallback)
# --------------------------
def generate_queries_via_openai(prompt: str, n: int = 5) -> List[str]:
    """
    Ask OpenAI to return a JSON array of short search queries similar to the prompt.
    If OpenAI is not configured or fails, use a conservative fallback.
    """
    if not USE_OPENAI or client is None:
        return fallback_generate_queries(prompt, n)

    system = (
        "You are an assistant that MUST return ONLY a JSON array of short search query strings "
        "(no explanation). Each string should be concise (1-6 words) and be a close variant or related "
        "search term that would help find images/videos/audio for the brief. Return exactly the JSON array."
    )
    user = f"Create {n} concise search queries for this brief: \"{prompt}\". Return JSON array only."

    try:
        resp = client.chat.completions.create(
            model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
            messages=[
                {"role": "system", "content": system},
                {"role": "user", "content": user},
            ],
            temperature=0.2,
            max_tokens=200,
        )
        # safe access to message content
        try:
            content = resp.choices[0].message["content"]
        except Exception:
            content = resp.choices[0].message.content if hasattr(resp.choices[0].message, "content") else ""
        text = content.strip()
        # strip code fences if present
        if text.startswith("```"):
            lines = text.splitlines()
            if len(lines) >= 3:
                text = "\n".join(lines[1:-1])
        queries = json.loads(text)
        # sanitize and limit
        cleaned = []
        for q in queries:
            if isinstance(q, str):
                s = " ".join(q.split()).strip()
                if s:
                    cleaned.append(s)
            if len(cleaned) >= n:
                break
        if cleaned:
            return cleaned
        else:
            return fallback_generate_queries(prompt, n)
    except Exception as e:
        # fallback
        return fallback_generate_queries(prompt, n)


def fallback_generate_queries(prompt: str, n: int = 5) -> List[str]:
    """
    Conservative deterministic fallback: produce n query variants for the prompt.
    """
    base = prompt.strip()
    tokens = [t for t in base.split() if t]
    queries = []
    # 1: the original brief
    queries.append(base)
    # 2: add synonyms / context phrases
    queries.append(base + " street scene")
    queries.append(base + " crowd")
    queries.append(base + " city life")
    queries.append(base + " people walking at street")
    # dedupe and limit
    final = []
    seen = set()
    for q in queries:
        qn = " ".join(q.split())
        if qn.lower() not in seen:
            seen.add(qn.lower())
            final.append(qn)
        if len(final) >= n:
            break
    return final


# --------------------------
# Pexels API fetch helpers
# --------------------------
def pexels_search_photos(query: str, per_page: int = PEXELS_PER_QUERY_PHOTOS) -> List[Dict[str, Any]]:
    params = {"query": query, "per_page": per_page}
    try:
        r = requests.get(PEXELS_PHOTO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for p in data.get("photos", [])[:per_page]:
            items.append({
                "id": f"photo_{p.get('id')}",
                "url": p.get("src", {}).get("original") or p.get("src", {}).get("large"),
                "width": p.get("width"),
                "height": p.get("height"),
                "photographer": p.get("photographer"),
                "provider": "pexels",
                "raw": p
            })
        return items
    except Exception as e:
        print(f"[PEXELS PHOTOS ERROR] query='{query}' -> {e}")
        return []


def pexels_search_videos(query: str, per_page: int = PEXELS_PER_QUERY_VIDEOS) -> List[Dict[str, Any]]:
    params = {"query": query, "per_page": per_page}
    try:
        r = requests.get(PEXELS_VIDEO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for v in data.get("videos", [])[:per_page]:
            # pick best file (highest width*fps) if available
            files = v.get("video_files", []) or []
            chosen = None
            if files:
                files_sorted = sorted(files, key=lambda f: (f.get("width", 0), f.get("fps", 0)), reverse=True)
                chosen = files_sorted[0]
            items.append({
                "id": f"video_{v.get('id')}",
                "url": (chosen.get("link") if chosen else v.get("url")),
                "duration": v.get("duration"),
                "width": chosen.get("width") if chosen else None,
                "height": chosen.get("height") if chosen else None,
                "provider": "pexels",
                "raw": v
            })
        return items
    except Exception as e:
        print(f"[PEXELS VIDEOS ERROR] query='{query}' -> {e}")
        return []


# --------------------------
# Freesound API fetch helpers
# --------------------------
def freesound_search(query: str, page_size: int = FREESOUND_PER_QUERY) -> List[Dict[str, Any]]:
    params = {"query": query, "page_size": page_size, "fields": "id,name,previews,duration,username,tags,license"}
    try:
        r = requests.get(FREESOUND_SEARCH, headers=FREESOUND_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for item in data.get("results", [])[:page_size]:
            previews = item.get("previews", {}) or {}
            preview_url = previews.get("preview-hq-mp3") or previews.get("preview-hq-ogg") or previews.get("preview-lq-mp3")
            items.append({
                "id": f"fs_{item.get('id')}",
                "title": item.get("name"),
                "url": preview_url,
                "duration": item.get("duration"),
                "uploader": item.get("username"),
                "tags": item.get("tags", []),
                "license": item.get("license"),
                "provider": "freesound",
                "raw": item
            })
        return items
    except Exception as e:
        print(f"[FREESOUND ERROR] query='{query}' -> {e}")
        return []


# --------------------------
# Main pipeline
# --------------------------
def collect_assets_for_prompt(prompt: str, num_queries: int = MAX_QUERIES) -> Dict[str, Any]:
    out = {
        "prompt": prompt,
        "generated_queries": [],
        "results": {},  # query -> {pexels: {photos:[], videos:[]}, freesound: {audios:[]}}
        "notes": {}
    }

    # 1) generate queries
    queries = generate_queries_via_openai(prompt, n=num_queries)
    out["generated_queries"] = queries

    # 2) for each query, fetch from Pexels + Freesound
    for q in queries:
        qkey = q
        out["results"][qkey] = {"pexels": {"photos": [], "videos": []}, "freesound": {"audios": []}}
        # Pexels photos
        photos = pexels_search_photos(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        videos = pexels_search_videos(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        audios = freesound_search(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        out["results"][qkey]["pexels"]["photos"] = photos
        out["results"][qkey]["pexels"]["videos"] = videos
        out["results"][qkey]["freesound"]["audios"] = audios

    return out


# --------------------------
# CLI / example usage
# --------------------------
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Collect Pexels + Freesound assets for an unstructured prompt.")
    parser.add_argument("--prompt", "-p", type=str, help="User prompt (e.g. 'bustling city with people')", required=False)
    parser.add_argument("--queries", "-q", type=int, default=5, help="Number of similar queries to generate (default 5)")
    parser.add_argument("--out", "-o", type=str, default="assets_unstructured.json", help="Output JSON filename")
    args = parser.parse_args()

    if not args.prompt:
        # interactive ask
        user_prompt = input("Enter a prompt (e.g. 'bustling city with people'): ").strip()
    else:
        user_prompt = args.prompt.strip()

    print("[RUN] Prompt:", user_prompt)
    plan = collect_assets_for_prompt(user_prompt, num_queries=args.queries)

    # save output
    with open(args.out, "w", encoding="utf-8") as f:
        json.dump(plan, f, indent=2)

    print(f"[DONE] Saved results to {args.out}")
    # print summary
    total_photos = sum(len(plan["results"][q]["pexels"]["photos"]) for q in plan["generated_queries"])
    total_videos = sum(len(plan["results"][q]["pexels"]["videos"]) for q in plan["generated_queries"])
    total_audios = sum(len(plan["results"][q]["freesound"]["audios"]) for q in plan["generated_queries"])
    print(f"Found {total_photos} photos, {total_videos} videos (Pexels), {total_audios} audio previews (Freesound) across {len(plan['generated_queries'])} queries.")


usage: ipykernel_launcher.py [-h] [--prompt PROMPT] [--queries QUERIES]
                             [--out OUT]
ipykernel_launcher.py: error: unrecognized arguments: --f=c:\Users\JENNIFER\AppData\Roaming\jupyter\runtime\kernel-v3306c45ef2c8796af97fc5df342ae7a5d6b0ead35.json


SystemExit: 2

In [None]:
import os
import time
import json
import io
from typing import List, Dict, Any, Optional
from urllib.parse import quote_plus
from dotenv import load_dotenv
import requests
from PIL import Image
import numpy as np
import cv2
from mutagen import File as MutagenFile  # for audio metadata

# --- OpenAI modern client ---
from openai import OpenAI

# Try to import ultralytics YOLO (optional)
try:
    from ultralytics import YOLO
    _YOLO_AVAILABLE = True
except Exception:
    _YOLO_AVAILABLE = False

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
FREESOUND_API_KEY = os.getenv("FREESOUND_API_KEY")

if not OPENAI_API_KEY:
    raise RuntimeError("Please set OPENAI_API_KEY in your environment or .env file")
if not PEXELS_API_KEY:
    raise RuntimeError("Please set PEXELS_API_KEY in your environment or .env file")
if not FREESOUND_API_KEY:
    raise RuntimeError("Please set FREESOUND_API_KEY in your environment or .env file")

client = OpenAI(api_key=OPENAI_API_KEY)
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")  # change if you prefer another model

# Pexels API endpoints
PEXELS_PHOTO_SEARCH = "https://api.pexels.com/v1/search"
PEXELS_VIDEO_SEARCH = "https://api.pexels.com/videos/search"
PEXELS_HEADERS = {"Authorization": PEXELS_API_KEY}

# Freesound endpoints
FREESOUND_SEARCH = "https://freesound.org/apiv2/search/text/"
FREESOUND_HEADERS = {"Authorization": f"Token {FREESOUND_API_KEY}"}

In [None]:
MAX_VIDEOS_PER_QUERY = 3
MAX_IMAGES_PER_QUERY = 3
MAX_AUDIOS_PER_QUERY = 3
MAX_QUERIES_PER_SHOT = 3
MAX_SHOTS = 6

# ---------------------
# 1) OpenAI shot-splitting (structured JSON)
# ---------------------
def ask_model_for_shots(prompt_text: str, min_shots: int = 1, max_shots: int = 4) -> List[Dict[str, Any]]:
    """
    Use OpenAI to return structured shots JSON (id, text_description, suggested_duration_seconds, keywords, style_tokens, negative_filters)
    """
    system = (
        "You are an assistant that MUST return ONLY valid JSON (no commentary). "
        f"Given a short video brief, return an array of between {min_shots} and {max_shots} shots. "
        "Each shot object must include the following keys: "
        '"id" (string), "text_description" (string), "suggested_duration_seconds" (integer), '
        '"keywords" (array of strings), "style_tokens" (array of strings), "negative_filters" (array of strings).'
    )
    user = f"Convert this brief into shots: \"{prompt_text}\". Return JSON only."

    resp = client.chat.completions.create(
        model=OPENAI_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ],
        temperature=0.0,
        max_tokens=400,
    )

    # safe access to content
    try:
        content = resp.choices[0].message["content"]
    except Exception:
        try:
            content = resp.choices[0].message.content
        except Exception:
            content = resp.choices[0].get("message", {}).get("content", "")

    text = content.strip() if content else ""
    # strip backticks if model wraps JSON in fences
    if text.startswith("```"):
        lines = text.splitlines()
        if len(lines) >= 3:
            text = "\n".join(lines[1:-1])

    try:
        shots = json.loads(text)
    except Exception as e:
        raise RuntimeError(f"Failed to parse JSON from model output: {e}\n---\n{text}")

    if not isinstance(shots, list):
        raise RuntimeError("Model did not return a JSON array of shots.")
    return shots


In [None]:
# ---------------------
# 2) Query expansion
# ---------------------
def expand_queries_for_shot(shot: Dict[str, Any], max_queries: int = MAX_QUERIES_PER_SHOT) -> List[str]:
    queries = []
    text = shot.get("text_description", "").strip()
    if text:
        queries.append(text)
    keywords = shot.get("keywords", []) or []
    if keywords:
        queries.append(" ".join(keywords))
        for k in keywords[:2]:
            queries.append(k)
    style_tokens = shot.get("style_tokens", []) or []
    if style_tokens and text:
        queries.append(text + " " + " ".join(style_tokens[:2]))
    seen = set()
    final = []
    for q in queries:
        qclean = " ".join(q.split())
        if qclean and qclean not in seen:
            seen.add(qclean)
            final.append(qclean)
        if len(final) >= max_queries:
            break
    return final

In [None]:
# ---------------------
# 3) Pexels API helpers (fetch)
# ---------------------
def pexels_search_photos(query: str, per_page: int = MAX_IMAGES_PER_QUERY) -> Dict[str, Any]:
    params = {"query": query, "per_page": per_page}
    r = requests.get(PEXELS_PHOTO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
    r.raise_for_status()
    return r.json()

def pexels_search_videos(query: str, per_page: int = MAX_VIDEOS_PER_QUERY) -> Dict[str, Any]:
    params = {"query": query, "per_page": per_page}
    r = requests.get(PEXELS_VIDEO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
    r.raise_for_status()
    return r.json()

def normalize_photo_item(item: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": f"photo_{item.get('id')}",
        "type": "image",
        "url": item.get("src", {}).get("original") or item.get("src", {}).get("large"),
        "width": item.get("width"),
        "height": item.get("height"),
        "photographer": item.get("photographer"),
        "provider": "pexels",
        "license": "pexels",
        "meta": item,
    }

def normalize_video_item(item: Dict[str, Any]) -> Dict[str, Any]:
    files = item.get("video_files", []) or []
    chosen = None
    if files:
        files_sorted = sorted(files, key=lambda f: (f.get("width", 0), f.get("fps", 0)), reverse=True)
        chosen = files_sorted[0]
    return {
        "id": f"video_{item.get('id')}",
        "type": "video",
        "url": chosen.get("link") if chosen else (item.get("url")),
        "duration": item.get("duration"),
        "width": chosen.get("width") if chosen else None,
        "height": chosen.get("height") if chosen else None,
        "provider": "pexels",
        "license": "pexels",
        "meta": item,
    }

def fetch_pexels_for_query(query: str, top_k_v: int = MAX_VIDEOS_PER_QUERY, top_k_i: int = MAX_IMAGES_PER_QUERY) -> Dict[str, List[Dict[str,Any]]]:
    results = {"videos": [], "images": []}
    try:
        vresp = pexels_search_videos(query, per_page=top_k_v)
        videos = vresp.get("videos", [])
        for v in videos[:top_k_v]:
            norm = normalize_video_item(v)
            results["videos"].append(norm)
    except Exception as e:
        print(f"[warning] video search failed for query '{query}': {e}")
    try:
        presp = pexels_search_photos(query, per_page=top_k_i)
        photos = presp.get("photos", [])
        for p in photos[:top_k_i]:
            norm = normalize_photo_item(p)
            results["images"].append(norm)
    except Exception as e:
        print(f"[warning] photo search failed for query '{query}': {e}")
    return results


In [None]:
# ---------------------
# 4) Freesound API helpers (audio)
# ---------------------
def freesound_search(query: str, page_size: int = MAX_AUDIOS_PER_QUERY) -> Dict[str, Any]:
    """
    Search Freesound using text search.
    Returns the parsed JSON results (the 'results' list contains items).
    """
    params = {"query": query, "page_size": page_size, "fields": "id,name,previews,duration,username,tags,license"}
    r = requests.get(FREESOUND_SEARCH, headers=FREESOUND_HEADERS, params=params, timeout=12)
    r.raise_for_status()
    return r.json()

def normalize_freesound_item(item: Dict[str, Any]) -> Dict[str, Any]:
    """
    Normalize a Freesound search result to our asset dict.
    We'll prefer the high-quality preview URL for prototyping (preview-hq-mp3 or preview-hq-ogg)
    """
    previews = item.get("previews", {}) or {}
    preview_url = previews.get("preview-hq-mp3") or previews.get("preview-hq-ogg") or previews.get("preview-lq-mp3")
    return {
        "id": f"audio_fs_{item.get('id')}",
        "type": "audio",
        "url": preview_url,
        "duration": item.get("duration"),
        "title": item.get("name"),
        "uploader": item.get("username"),
        "tags": item.get("tags", []),
        "provider": "freesound",
        "license": item.get("license"),
        "meta": item,
    }

def fetch_freesound_for_query(query: str, top_k: int = MAX_AUDIOS_PER_QUERY) -> Dict[str, List[Dict[str,Any]]]:
    results = {"audios": []}
    try:
        resp = freesound_search(query, page_size=top_k)
        for item in resp.get("results", [])[:top_k]:
            norm = normalize_freesound_item(item)
            # only include if preview URL exists
            if norm.get("url"):
                results["audios"].append(norm)
    except Exception as e:
        print(f"[warning] Freesound search failed for query '{query}': {e}")
    return results


In [None]:
_face_cascade = None
def _get_face_cascade():
    global _face_cascade
    if _face_cascade is None:
        cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        _face_cascade = cv2.CascadeClassifier(cascade_path)
    return _face_cascade

def download_bytes(url: str, max_bytes: Optional[int] = None, timeout: int = 12) -> Optional[bytes]:
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        if max_bytes:
            r = requests.get(url, stream=True, timeout=timeout, headers=headers)
            r.raise_for_status()
            buf = io.BytesIO()
            for chunk in r.iter_content(chunk_size=8192):
                if not chunk:
                    break
                buf.write(chunk)
                if buf.tell() >= max_bytes:
                    break
            return buf.getvalue()
        else:
            r = requests.get(url, timeout=timeout, headers=headers)
            r.raise_for_status()
            return r.content
    except Exception:
        return None

def sample_frames_from_video(url_or_path: str, n_frames: int = 6) -> List[np.ndarray]:
    cap = cv2.VideoCapture(url_or_path)
    if not cap.isOpened():
        try:
            data = download_bytes(url_or_path)
            if not data:
                return []
            tmp = "tmp_asset_video.bin"
            with open(tmp, "wb") as f:
                f.write(data)
            cap = cv2.VideoCapture(tmp)
            if not cap.isOpened():
                return []
        except Exception:
            return []
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
    if frame_count <= 0:
        frames = []
        for _ in range(n_frames):
            ret, f = cap.read()
            if not ret:
                break
            frames.append(cv2.cvtColor(f, cv2.COLOR_BGR2RGB))
        cap.release()
        return frames
    indices = np.linspace(0, max(0, frame_count - 1), num=min(n_frames, frame_count), dtype=int)
    frames = []
    for idx in indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
        ret, f = cap.read()
        if not ret or f is None:
            continue
        frames.append(cv2.cvtColor(f, cv2.COLOR_BGR2RGB))
    cap.release()
    return frames

def sample_frames_from_image(url_or_path: str) -> List[np.ndarray]:
    b = download_bytes(url_or_path)
    if not b:
        return []
    try:
        im = Image.open(io.BytesIO(b)).convert("RGB")
        arr = np.array(im)
        return [arr]
    except Exception:
        return []

def dominant_colors_from_rgb_array(arr: np.ndarray, top_k: int = 3) -> List[str]:
    try:
        im = Image.fromarray(arr)
        small = im.resize((200, 200))
        pal = small.convert("P", palette=Image.ADAPTIVE, colors=top_k)
        palette = pal.getpalette()
        color_counts = pal.getcolors()
        color_counts.sort(reverse=True)
        dominant = []
        for count, idx in color_counts[:top_k]:
            r = palette[idx*3]; g = palette[idx*3+1]; b = palette[idx*3+2]
            dominant.append('#{:02x}{:02x}{:02x}'.format(r,g,b))
        return dominant
    except Exception:
        return []

def compute_motion_intensity(frames: List[np.ndarray]) -> (str, float):
    if not frames or len(frames) < 2:
        return ("none", 0.0)
    mags = []
    for i in range(1, len(frames)):
        a = cv2.cvtColor(frames[i-1], cv2.COLOR_RGB2GRAY).astype(np.float32)
        b = cv2.cvtColor(frames[i], cv2.COLOR_RGB2GRAY).astype(np.float32)
        diff = np.abs(b - a)
        mags.append(diff.mean())
    mean_mag = float(np.mean(mags)) if mags else 0.0
    if mean_mag < 2.5:
        band = "low"
    elif mean_mag < 8.0:
        band = "medium"
    else:
        band = "high"
    return (band, mean_mag)

def estimate_camera_move(frames: List[np.ndarray]) -> (str, float):
    if not frames or len(frames) < 2:
        return ("unknown", 0.0)
    try:
        prev = cv2.cvtColor(frames[0], cv2.COLOR_RGB2GRAY)
        nxt = cv2.cvtColor(frames[-1], cv2.COLOR_RGB2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev, nxt, None,
                                            pyr_scale=0.5, levels=3, winsize=15,
                                            iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
        mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
        avg_mag = float(np.mean(mag))
        avg_dx = float(np.mean(flow[...,0]))
        avg_dy = float(np.mean(flow[...,1]))
        if avg_mag < 0.5:
            return ("static", avg_mag)
        if abs(avg_dx) > abs(avg_dy) * 1.2:
            return ("pan", avg_mag)
        if abs(avg_dy) > abs(avg_dx) * 1.2:
            return ("tilt", avg_mag)
        return ("dolly_or_forward", avg_mag)
    except Exception:
        return ("unknown", 0.0)

# YOLO wrapper
_yolo_model = None
def _get_yolo_model():
    global _yolo_model
    if not _YOLO_AVAILABLE:
        return None
    if _yolo_model is None:
        _yolo_model = YOLO("yolov8n.pt")
    return _yolo_model

def detect_with_yolo_on_frames(frames: List[np.ndarray]) -> Dict[str, Any]:
    model = _get_yolo_model()
    if model is None or not frames:
        return {"objects": [], "contains_people": False, "person_count": 0}
    try:
        frame = frames[0]
        results = model.predict(source=frame, imgsz=640, conf=0.25, verbose=False)
        dets = []
        contains_people = False
        person_count = 0
        if results and len(results) > 0:
            r = results[0]
            boxes = getattr(r, "boxes", []) or []
            for b in boxes:
                try:
                    cls = int(b.cls.cpu().numpy()[0]) if hasattr(b, 'cls') else int(b.cls[0])
                    label = model.names.get(cls, str(cls)) if hasattr(model, "names") else str(cls)
                    conf = float(b.conf.cpu().numpy()[0]) if hasattr(b, 'conf') else float(b.conf[0])
                except Exception:
                    continue
                dets.append({"label": label, "conf": conf})
                if label.lower() in ("person", "people"):
                    contains_people = True
                    person_count += 1
        return {"objects": dets, "contains_people": contains_people, "person_count": person_count}
    except Exception:
        return {"objects": [], "contains_people": False, "person_count": 0}

def detect_faces_on_frames(frames: List[np.ndarray]) -> Dict[str, Any]:
    face_cascade = _get_face_cascade()
    if face_cascade is None or not frames:
        return {"contains_people": False, "person_count": 0, "objects": []}
    count = 0
    for frame in frames:
        try:
            gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=4, minSize=(30,30))
            count += len(faces)
        except Exception:
            continue
    return {"contains_people": (count > 0), "person_count": count, "objects": []}

def infer_mood_tags(dominant_colors: List[str], motion_band: str, contains_people: bool, objects: List[Dict]) -> (List[str], float, str):
    tags = []; reason_terms = []; temp = 0
    if dominant_colors:
        def hex_to_rgb(h):
            h = h.lstrip('#')
            return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
        try:
            r,g,b = hex_to_rgb(dominant_colors[0])
            temp = (r - b) / 255.0
        except Exception:
            temp = 0
    if motion_band in ("none","low"):
        if temp > 0.05:
            tags.extend(["calm", "nostalgic"]); reason_terms.append("warm_palette+low_motion")
        else:
            tags.extend(["calm"]); reason_terms.append("cool+low_motion")
    elif motion_band == "medium":
        if temp > 0.05:
            tags.extend(["gentle", "pleasant"]); reason_terms.append("warm+medium_motion")
        else:
            tags.extend(["moving","reflective"]); reason_terms.append("cool+medium_motion")
    else:
        tags.extend(["energetic","urgent"]); reason_terms.append("high_motion")
    obj_labels = [o["label"].lower() for o in objects] if objects else []
    if contains_people and "calm" in tags:
        tags.append("intimate"); reason_terms.append("people+calm")
    if any(x in obj_labels for x in ("ocean","beach","shore","wave","sea","kite")):
        if "calm" in tags or temp > 0.05:
            tags = ["calm","poignant"]; reason_terms.append("ocean+kite+warm")
    tags = list(dict.fromkeys(tags))[:3]
    score = 0.5
    if dominant_colors: score += 0.15
    if motion_band != "none": score += 0.15
    if contains_people or objects: score += 0.15
    score = min(0.99, score)
    reason = ";".join(reason_terms) if reason_terms else ""
    return tags, round(score,2), reason

def classify_asset(url_or_path: str, asset_type: str = None, sample_frames_count: int = 6) -> Dict[str, Any]:
    if asset_type is None:
        l = url_or_path.lower()
        if any(l.endswith(x) for x in (".mp4",".mov",".webm",".mkv",".avi")):
            asset_type = "video"
        elif any(l.endswith(x) for x in (".jpg",".jpeg",".png",".webp")):
            asset_type = "image"
        else:
            cap = cv2.VideoCapture(url_or_path)
            if cap and cap.isOpened():
                asset_type = "video"; cap.release()
            else:
                asset_type = "image"
    frames = sample_frames_from_video(url_or_path, n_frames=sample_frames_count) if asset_type=="video" else sample_frames_from_image(url_or_path)
    classification = {
        "contains_people": False, "person_count": 0, "objects": [],
        "motion_intensity": "none", "motion_metric": 0.0,
        "camera_move": "unknown", "camera_move_metric": 0.0,
        "dominant_colors": [], "mood_tags": [], "mood_confidence": 0.0, "mood_reason": "", "notes": ""
    }
    if not frames:
        classification["notes"]="no_frames_obtained"
        return {"url":url_or_path, "type":asset_type, "classification":classification}
    try:
        dom = dominant_colors_from_rgb_array(frames[0], top_k=3)
        classification["dominant_colors"]=dom
    except Exception:
        classification["dominant_colors"]=[]
    if _YOLO_AVAILABLE:
        try:
            det = detect_with_yolo_on_frames(frames)
            classification["objects"]=det.get("objects",[])
            classification["contains_people"]=bool(det.get("contains_people",False))
            classification["person_count"]=int(det.get("person_count",0))
        except Exception as e:
            classification["notes"]+=f"yolo_failed:{e};"
    else:
        try:
            det = detect_faces_on_frames(frames)
            classification["contains_people"]=det.get("contains_people",False)
            classification["person_count"]=det.get("person_count",0)
            classification["objects"]=[]
        except Exception as e:
            classification["notes"]+=f"face_failed:{e};"
    try:
        band, metric = compute_motion_intensity(frames)
        classification["motion_intensity"]=band; classification["motion_metric"]=round(metric,3)
    except Exception as e:
        classification["notes"]+=f"motion_failed:{e};"
    try:
        cam, cam_metric = estimate_camera_move(frames)
        classification["camera_move"]=cam; classification["camera_move_metric"]=round(cam_metric,3)
    except Exception as e:
        classification["notes"]+=f"camera_move_failed:{e};"
    try:
        tags, conf, reason = infer_mood_tags(classification["dominant_colors"], classification["motion_intensity"], classification["contains_people"], classification["objects"])
        classification["mood_tags"]=tags; classification["mood_confidence"]=conf; classification["mood_reason"]=reason
    except Exception as e:
        classification["notes"]+=f"mood_failed:{e};"
    return {"url":url_or_path, "type":asset_type, "classification":classification}



In [6]:
#!/usr/bin/env python3
"""
collect_assets_unstructured.py

- Input: a basic user prompt (unstructured).
- Uses OpenAI to create 4-6 similar queries (fallback if OpenAI not available).
- For each query: searches Pexels (photos + videos) and Freesound (audio).
- Saves combined results to assets_unstructured.json.

Notes:
- Prefer the official APIs (we use them here).
- This is intentionally simple and avoids heavy dependencies.
"""

import os
import time
import json
from typing import List, Dict, Any, Optional
from urllib.parse import quote_plus
from dotenv import load_dotenv
import requests

# OpenAI modern client
from openai import OpenAI

# --------------------------
# Config & keys (from .env)
# --------------------------
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PEXELS_API_KEY = os.getenv("PEXELS_API_KEY")
FREESOUND_API_KEY = os.getenv("FREESOUND_API_KEY")

if not PEXELS_API_KEY:
    raise RuntimeError("Please set PEXELS_API_KEY in your environment or .env file")
if not FREESOUND_API_KEY:
    raise RuntimeError("Please set FREESOUND_API_KEY in your environment or .env file")

USE_OPENAI = bool(OPENAI_API_KEY)
client = None
if USE_OPENAI:
    client = OpenAI(api_key=OPENAI_API_KEY)

# Endpoints & constants
PEXELS_PHOTO_SEARCH = "https://api.pexels.com/v1/search"
PEXELS_VIDEO_SEARCH = "https://api.pexels.com/videos/search"
PEXELS_HEADERS = {"Authorization": PEXELS_API_KEY}

FREESOUND_SEARCH = "https://freesound.org/apiv2/search/text/"
FREESOUND_HEADERS = {"Authorization": f"Token {FREESOUND_API_KEY}"}

# Limits - tune as needed
MAX_QUERIES = 6
PEXELS_PER_QUERY_PHOTOS = 4
PEXELS_PER_QUERY_VIDEOS = 3
FREESOUND_PER_QUERY = 3
DELAY_BETWEEN_PROVIDER_CALLS = 0.25  # polite delay


# --------------------------
# Helper: generate similar queries using OpenAI (or fallback)
# --------------------------
def generate_queries_via_openai(prompt: str, n: int = 5) -> List[str]:
    """
    Ask OpenAI to return a JSON array of short search queries similar to the prompt.
    If OpenAI is not configured or fails, use a conservative fallback.
    """
    if not USE_OPENAI or client is None:
        return fallback_generate_queries(prompt, n)

    system = (
        "You are an assistant that MUST return ONLY a JSON array of short search query strings "
        "(no explanation). Each string should be concise (1-6 words) and be a close variant or related "
        "search term that would help find images/videos/audio for the brief. Return exactly the JSON array."
    )
    user = f"Create {n} concise search queries for this brief: \"{prompt}\". Return JSON array only."

    try:
        resp = client.chat.completions.create(
            model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
            messages=[
                {"role": "system", "content": system},
                {"role": "user", "content": user},
            ],
            temperature=0.2,
            max_tokens=200,
        )
        # safe access to message content
        try:
            content = resp.choices[0].message["content"]
        except Exception:
            content = resp.choices[0].message.content if hasattr(resp.choices[0].message, "content") else ""
        text = content.strip()
        # strip code fences if present
        if text.startswith("```"):
            lines = text.splitlines()
            if len(lines) >= 3:
                text = "\n".join(lines[1:-1])
        queries = json.loads(text)
        # sanitize and limit
        cleaned = []
        for q in queries:
            if isinstance(q, str):
                s = " ".join(q.split()).strip()
                if s:
                    cleaned.append(s)
            if len(cleaned) >= n:
                break
        if cleaned:
            return cleaned
        else:
            return fallback_generate_queries(prompt, n)
    except Exception as e:
        # fallback
        return fallback_generate_queries(prompt, n)


def fallback_generate_queries(prompt: str, n: int = 5) -> List[str]:
    """
    Conservative deterministic fallback: produce n query variants for the prompt.
    """
    base = prompt.strip()
    tokens = [t for t in base.split() if t]
    queries = []
    # 1: the original brief
    queries.append(base)
    # 2: add synonyms / context phrases
    queries.append(base + " street scene")
    queries.append(base + " crowd")
    queries.append(base + " city life")
    queries.append(base + " people walking at street")
    # dedupe and limit
    final = []
    seen = set()
    for q in queries:
        qn = " ".join(q.split())
        if qn.lower() not in seen:
            seen.add(qn.lower())
            final.append(qn)
        if len(final) >= n:
            break
    return final


# --------------------------
# Pexels API fetch helpers
# --------------------------
def pexels_search_photos(query: str, per_page: int = PEXELS_PER_QUERY_PHOTOS) -> List[Dict[str, Any]]:
    params = {"query": query, "per_page": per_page}
    try:
        r = requests.get(PEXELS_PHOTO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for p in data.get("photos", [])[:per_page]:
            items.append({
                "id": f"photo_{p.get('id')}",
                "url": p.get("src", {}).get("original") or p.get("src", {}).get("large"),
                "width": p.get("width"),
                "height": p.get("height"),
                "photographer": p.get("photographer"),
                "provider": "pexels",
                "raw": p
            })
        return items
    except Exception as e:
        print(f"[PEXELS PHOTOS ERROR] query='{query}' -> {e}")
        return []


def pexels_search_videos(query: str, per_page: int = PEXELS_PER_QUERY_VIDEOS) -> List[Dict[str, Any]]:
    params = {"query": query, "per_page": per_page}
    try:
        r = requests.get(PEXELS_VIDEO_SEARCH, headers=PEXELS_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for v in data.get("videos", [])[:per_page]:
            # pick best file (highest width*fps) if available
            files = v.get("video_files", []) or []
            chosen = None
            if files:
                files_sorted = sorted(files, key=lambda f: (f.get("width", 0), f.get("fps", 0)), reverse=True)
                chosen = files_sorted[0]
            items.append({
                "id": f"video_{v.get('id')}",
                "url": (chosen.get("link") if chosen else v.get("url")),
                "duration": v.get("duration"),
                "width": chosen.get("width") if chosen else None,
                "height": chosen.get("height") if chosen else None,
                "provider": "pexels",
                "raw": v
            })
        return items
    except Exception as e:
        print(f"[PEXELS VIDEOS ERROR] query='{query}' -> {e}")
        return []


# --------------------------
# Freesound API fetch helpers
# --------------------------
def freesound_search(query: str, page_size: int = FREESOUND_PER_QUERY) -> List[Dict[str, Any]]:
    params = {"query": query, "page_size": page_size, "fields": "id,name,previews,duration,username,tags,license"}
    try:
        r = requests.get(FREESOUND_SEARCH, headers=FREESOUND_HEADERS, params=params, timeout=12)
        r.raise_for_status()
        data = r.json()
        items = []
        for item in data.get("results", [])[:page_size]:
            previews = item.get("previews", {}) or {}
            preview_url = previews.get("preview-hq-mp3") or previews.get("preview-hq-ogg") or previews.get("preview-lq-mp3")
            items.append({
                "id": f"fs_{item.get('id')}",
                "title": item.get("name"),
                "url": preview_url,
                "duration": item.get("duration"),
                "uploader": item.get("username"),
                "tags": item.get("tags", []),
                "license": item.get("license"),
                "provider": "freesound",
                "raw": item
            })
        return items
    except Exception as e:
        print(f"[FREESOUND ERROR] query='{query}' -> {e}")
        return []


# --------------------------
# Main pipeline
# --------------------------
def collect_assets_for_prompt(prompt: str, num_queries: int = MAX_QUERIES) -> Dict[str, Any]:
    out = {
        "prompt": prompt,
        "generated_queries": [],
        "results": {},  # query -> {pexels: {photos:[], videos:[]}, freesound: {audios:[]}}
        "notes": {}
    }

    # 1) generate queries
    queries = generate_queries_via_openai(prompt, n=num_queries)
    out["generated_queries"] = queries

    # 2) for each query, fetch from Pexels + Freesound
    for q in queries:
        qkey = q
        out["results"][qkey] = {"pexels": {"photos": [], "videos": []}, "freesound": {"audios": []}}
        # Pexels photos
        photos = pexels_search_photos(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        videos = pexels_search_videos(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        audios = freesound_search(q)
        time.sleep(DELAY_BETWEEN_PROVIDER_CALLS)
        out["results"][qkey]["pexels"]["photos"] = photos
        out["results"][qkey]["pexels"]["videos"] = videos
        out["results"][qkey]["freesound"]["audios"] = audios

    return out


# --------------------------
# CLI / example usage
# --------------------------
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Collect Pexels + Freesound assets for an unstructured prompt.")
    parser.add_argument("--prompt", "-p", type=str, help="User prompt (e.g. 'bustling city with people')", required=False)
    parser.add_argument("--queries", "-q", type=int, default=5, help="Number of similar queries to generate (default 5)")
    parser.add_argument("--out", "-o", type=str, default="assets_unstructured.json", help="Output JSON filename")
    args, unknown = parser.parse_known_args()

    if not args.prompt:
        # interactive ask
        user_prompt = input("Enter a prompt (e.g. 'bustling city with people'): ").strip()
    else:
        user_prompt = args.prompt.strip()

    print("[RUN] Prompt:", user_prompt)
    plan = collect_assets_for_prompt(user_prompt, num_queries=args.queries)

    # save output
    with open(args.out, "w", encoding="utf-8") as f:
        json.dump(plan, f, indent=2)

    print(f"[DONE] Saved results to {args.out}")
    # print summary
    total_photos = sum(len(plan["results"][q]["pexels"]["photos"]) for q in plan["generated_queries"])
    total_videos = sum(len(plan["results"][q]["pexels"]["videos"]) for q in plan["generated_queries"])
    total_audios = sum(len(plan["results"][q]["freesound"]["audios"]) for q in plan["generated_queries"])
    print(f"Found {total_photos} photos, {total_videos} videos (Pexels), {total_audios} audio previews (Freesound) across {len(plan['generated_queries'])} queries.")


[RUN] Prompt: Calm winds near seashore
[DONE] Saved results to assets_unstructured.json
Found 20 photos, 15 videos (Pexels), 6 audio previews (Freesound) across 5 queries.
