In [None]:
# Install all required dependencies (run in Colab cell)
!pip install ultralytics
!pip install git+https://github.com/openai/CLIP.git
!pip install faiss-cpu
!pip install transformers
!pip install opencv-python
!pip install pandas
!pip install numpy
!pip install Pillow
!pip install requests
!pip install scikit-learn
!pip install openai-whisper

# Import libraries
import os
import cv2
import numpy as np
import pandas as pd
import json
import torch
import clip
import faiss
from PIL import Image
import requests
from io import BytesIO
from ultralytics import YOLO
from transformers import pipeline
import warnings
warnings.filterwarnings('ignore')
print("Environment setup complete!")


Collecting ultralytics
  Downloading ultralytics-8.3.152-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [None]:
# Mount Google Drive (if using Colab)
from google.colab import drive
drive.mount('/content/drive')

# Define base paths
BASE_PATH = '/content/drive/MyDrive/FlickdAI'
VIDEOS_PATH = f'{BASE_PATH}/videos'
CATALOG_PATH = f'{BASE_PATH}/product_data[1].xlsx'
VIBES_PATH = f'{BASE_PATH}/vibeslist[1].json'
OUTPUTS_PATH = f'{BASE_PATH}/outputs'
os.makedirs(OUTPUTS_PATH, exist_ok=True)
print("Paths set.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Paths set.


In [None]:
import glob

def inspect_and_prepare_catalog():
    """Inspect and clean the catalog, save as CSV."""
    catalog_df = pd.read_excel(CATALOG_PATH)
    # Map columns to expected names
    column_mapping = {}
    for col in catalog_df.columns:
        cl = col.lower()
        if 'id' in cl: column_mapping[col] = 'product_id'
        elif 'title' in cl or 'name' in cl: column_mapping[col] = 'title'
        elif 'url' in cl or 'image' in cl: column_mapping[col] = 'shopify_cdn_url'
        elif 'category' in cl or 'type' in cl: column_mapping[col] = 'category'
        elif 'color' in cl: column_mapping[col] = 'color'
    catalog_df = catalog_df.rename(columns=column_mapping)
    for col in ['product_id', 'title', 'shopify_cdn_url', 'category', 'color']:
        if col not in catalog_df: catalog_df[col] = 'unknown'
    catalog_csv = f'{BASE_PATH}/catalog.csv'
    catalog_df[['product_id','title','shopify_cdn_url','category','color']].to_csv(catalog_csv, index=False)
    print(f"Catalog saved as {catalog_csv}")
    return catalog_csv

CATALOG_CSV = inspect_and_prepare_catalog()


Catalog saved as /content/drive/MyDrive/FlickdAI/catalog.csv


In [None]:
class FrameExtractor:
    """Extracts keyframes from a video file at a given interval."""
    def __init__(self, interval=1.0):
        self.interval = interval

    def extract(self, video_path):
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        frame_interval = int(fps * self.interval)
        frames = []
        frame_count = 0
        while True:
            ret, frame = cap.read()
            if not ret: break
            if frame_count % frame_interval == 0 and self.is_good_frame(frame):
                frames.append({'frame': frame, 'frame_number': frame_count})
            frame_count += 1
        cap.release()
        return frames

    def is_good_frame(self, frame):
        if frame is None: return False
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if np.mean(gray) < 50 or np.mean(gray) > 200: return False
        if cv2.Laplacian(gray, cv2.CV_64F).var() < 100: return False
        return True


In [None]:
class FashionDetector:
    """Detects fashion items in frames using YOLOv8."""
    def __init__(self):
        self.model = YOLO('yolov8n.pt')
        self.fashion_classes = ['person', 'handbag', 'tie', 'suitcase']

    def detect(self, frame, conf=0.5):
        results = self.model(frame, conf=conf, verbose=False)
        detections = []
        for result in results:
            boxes = result.boxes
            if boxes is not None:
                for box in boxes:
                    class_id = int(box.cls[0])
                    class_name = self.model.names[class_id]
                    if class_name in self.fashion_classes:
                        x1, y1, x2, y2 = [int(x) for x in box.xyxy[0].tolist()]
                        detections.append({
                            'class_name': class_name,
                            'bbox': [x1, y1, x2, y2],
                            'confidence': float(box.conf[0])
                        })
        return detections

    def extract_regions(self, frame, detections):
        crops = []
        h, w = frame.shape[:2]
        for det in detections:
            x1, y1, x2, y2 = det['bbox']
            x1, y1 = max(0, x1), max(0, y1)
            x2, y2 = min(w, x2), min(h, y2)
            crop = frame[y1:y2, x1:x2]
            if crop.size > 0:
                crops.append({'crop': crop, 'detection': det})
        return crops


In [None]:
class FallbackProductMatcher:
    """Matches detected items to catalog using CLIP text embeddings."""
    def __init__(self, catalog_csv, cache_dir=f'{BASE_PATH}/cache'):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model, self.preprocess = clip.load("ViT-B/32", device=self.device)
        self.catalog = pd.read_csv(catalog_csv)
        os.makedirs(cache_dir, exist_ok=True)
        self.embeddings, self.index = self._build_index(cache_dir)

    def _build_index(self, cache_dir):
        emb_path, idx_path = f'{cache_dir}/text_emb.npy', f'{cache_dir}/faiss_idx.bin'
        if os.path.exists(emb_path) and os.path.exists(idx_path):
            embs = np.load(emb_path)
            idx = faiss.read_index(idx_path)
            return embs, idx
        embs = []
        for _, row in self.catalog.iterrows():
            desc = f"{row['title']} {row['category']} {row['color']}"
            tokens = clip.tokenize([desc]).to(self.device)
            with torch.no_grad():
                emb = self.model.encode_text(tokens).cpu().numpy().flatten()
            embs.append(emb.astype('float32'))
        embs = np.array(embs).astype('float32')
        idx = faiss.IndexFlatIP(embs.shape[1])
        faiss.normalize_L2(embs)
        idx.add(embs)
        np.save(emb_path, embs)
        faiss.write_index(idx, idx_path)
        return embs, idx

    def get_image_embedding(self, image):
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(image_rgb)
        image_input = self.preprocess(pil_image).unsqueeze(0).to(self.device)
        with torch.no_grad():
            emb = self.model.encode_image(image_input).cpu().numpy().flatten()
        return emb.astype('float32')

    def match(self, crop_data):
        crop = crop_data['crop']
        if crop.size == 0: return None
        query_emb = self.get_image_embedding(crop).reshape(1, -1)
        faiss.normalize_L2(query_emb)
        sim, idx = self.index.search(query_emb, k=1)
        sim, idx = float(sim[0][0]), int(idx[0][0])
        if sim < 0.6: return None
        match_type = "exact" if sim > 0.8 else "similar"
        row = self.catalog.iloc[idx]
        return {
            'type': row['category'],
            'color': row['color'],
            'matched_product_id': row['product_id'],
            'match_type': match_type,
            'confidence': sim
        }


In [None]:
class VibeClassifier:
    """Classifies video vibe using hybrid NLP."""
    def __init__(self, vibes_list_path):
        with open(vibes_list_path, 'r') as f:
            self.vibes = json.load(f)
        self.classifier = pipeline(
            "zero-shot-classification",
            model="facebook/bart-large-mnli",
            device=0 if torch.cuda.is_available() else -1
        )
        self.keywords = {
            "Coquette": ["bow", "ribbon", "pink", "feminine", "delicate", "romantic", "cute", "sweet"],
            "Clean Girl": ["minimal", "natural", "effortless", "simple", "dewy", "fresh", "clean"],
            "Cottagecore": ["floral", "vintage", "pastoral", "romantic", "countryside", "cottage", "nature"],
            "Streetcore": ["urban", "edgy", "street", "casual", "sneakers", "hip", "cool"],
            "Y2K": ["metallic", "futuristic", "cyber", "2000s", "tech", "digital", "retro"],
            "Boho": ["bohemian", "free-spirited", "earthy", "flowing", "ethnic", "boho", "hippie"],
            "Party Glam": ["sequins", "sparkle", "glamorous", "evening", "dressy", "party", "glam"]
        }
    def classify(self, text):
        text = (text or "").lower()
        rule_scores = {v: sum(1 for k in self.keywords[v] if k in text)/len(self.keywords[v]) for v in self.vibes}
        ml_scores = {v: 0 for v in self.vibes}
        try:
            if text.strip():
                result = self.classifier(text, self.vibes)
                for label, score in zip(result['labels'], result['scores']):
                    ml_scores[label] = float(score)
        except Exception: pass
        scores = {v: 0.6*ml_scores[v] + 0.4*rule_scores[v] for v in self.vibes}
        sorted_vibes = sorted(scores.items(), key=lambda x: x[1], reverse=True)
        return [v for v, s in sorted_vibes[:3] if s > 0.2] or [sorted_vibes[0][0]]


In [None]:
class FlickdPipeline:
    """Main pipeline to process all videos and save outputs."""
    def __init__(self, videos_dir, catalog_csv, vibes_json, outputs_dir):
        self.extractor = FrameExtractor()
        self.detector = FashionDetector()
        self.matcher = FallbackProductMatcher(catalog_csv)
        self.vibe_classifier = VibeClassifier(vibes_json)
        self.videos_dir = videos_dir
        self.outputs_dir = outputs_dir
        os.makedirs(outputs_dir, exist_ok=True)

    def process_video(self, video_path):
        video_id = os.path.splitext(os.path.basename(video_path))[0]
        txt_path = os.path.join(self.videos_dir, f"{video_id}.txt")
        caption = open(txt_path).read().strip() if os.path.exists(txt_path) else ""
        frames = self.extractor.extract(video_path)
        all_crops = []
        for f in frames:
            dets = self.detector.detect(f['frame'])
            crops = self.detector.extract_regions(f['frame'], dets)
            all_crops.extend(crops)
        all_matches = [self.matcher.match(c) for c in all_crops if self.matcher.match(c)]
        # Deduplicate by product_id, keep highest confidence
        unique = {}
        for m in all_matches:
            pid = m['matched_product_id']
            if pid not in unique or m['confidence'] > unique[pid]['confidence']:
                unique[pid] = m
        vibes = self.vibe_classifier.classify(caption)
        output = {
            "video_id": video_id,
            "vibes": vibes,
            "products": list(unique.values())[:4]
        }
        with open(os.path.join(self.outputs_dir, f"{video_id}.json"), 'w') as f:
            json.dump(output, f, indent=2)
        print(f"Processed {video_id}")

    def process_all(self):
        mp4s = glob.glob(f"{self.videos_dir}/*.mp4")
        for v in mp4s:
            self.process_video(v)
        print("All videos processed.")

# Run the pipeline
pipeline = FlickdPipeline(VIDEOS_PATH, CATALOG_CSV, VIBES_PATH, OUTPUTS_PATH)
pipeline.process_all()


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 114MB/s]
100%|████████████████████████████████████████| 338M/338M [00:02<00:00, 122MiB/s]


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


Processed 2025-05-28_13-42-32_UTC
Processed 2025-05-27_13-46-16_UTC
Processed 2025-06-02_11-31-19_UTC
Processed 2025-05-31_14-01-37_UTC
Processed 2025-05-22_08-25-12_UTC
Processed 2025-05-28_13-40-09_UTC
All videos processed.


In [None]:
import os
import shutil

base = '/content/submission'
os.makedirs(base, exist_ok=True)

# Create structure
for folder in ['videos', 'outputs', 'models', 'src', 'data']:
    os.makedirs(os.path.join(base, folder), exist_ok=True)

# Move/copy your files (adjust paths as needed)
shutil.copy('/content/drive/MyDrive/FlickdAI/catalog.csv', f'{base}/catalog.csv')
shutil.copy('/content/drive/MyDrive/FlickdAI/vibeslist[1].json', f'{base}/vibes_list.json')

# Example: move 2-3 sample videos for demo (not all, due to repo size limits)
import glob
for f in glob.glob('/content/drive/MyDrive/FlickdAI/videos/*.mp4')[:3]:
    shutil.copy(f, f"{base}/videos/{os.path.basename(f)}")

# Copy your best outputs
for f in glob.glob('/content/drive/MyDrive/FlickdAI/outputs/*.json'):
    shutil.copy(f, f"{base}/outputs/{os.path.basename(f)}")

print("Directory structure ready!")


Directory structure ready!


In [None]:
with open(f"{base}/src/frame_extractor.py", "w") as f:
    f.write("""<your FrameExtractor class code here>""")


In [None]:
!pip freeze > /content/submission/requirements.txt


In [None]:
# Install git if needed
!apt-get install git

# Configure your GitHub identity
!git config --global user.email "avinashgohite17@gmailcom.com"
!git config --global user.name "insanemate033-gif"


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.12).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [None]:
%cd /content/submission

# Initialize git
!git init
!git remote add origin https://github.com/insanemate033-gif/flickd-ai-hackathon.git

# Add all files
!git add .

# Commit
!git commit -m "Initial hackathon submission"

# Push (use a Personal Access Token for authentication if prompted)
!git branch -M main
!git push https://insanemate033-gif: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@github.com/insanemate033-gif/flickd-ai-hackathon.git main


/content/submission
[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/submission/.git/
[master (root-commit) ca3b430] Initial hackathon submission
 13 files changed, 1663 insertions(+)
 create mode 100644 catalog.csv
 create mode 100644 outputs/2025-05-22_08-25-12_UTC.json
 create mode 100644 outputs/2025-05-27_13-46-16_UTC.json
 create mode 100644 outputs/2025-05-28_13-40-09_UTC.json
 create mode 100644 outputs/2025-05-28_13-42-32_UTC.json
 create mode 100644 outputs/2025-05-31_14-01-37_UTC.json
 create mode 10064