# Pull the LightVision repository

In [1]:
try:
    import google.colab
    !git clone -b branch_name https://github.com/erenyavuz02/LightVision.git
    print("Repository cloned in Google Colab")
except ImportError:
    print("Running locally - repository clone skipped")

Running locally - repository clone skipped


# Set the config manager to use the local config file

In [2]:
import os
import sys

# Add project root to path
try:
    # Try to detect if running in Google Colab
    import google.colab
    # If running in Google Colab, you need to manually set the project root
    # Update this path to match where you cloned/uploaded your project in Colab
    project_root = '/content/LightVision'  # Adjust this path as needed for your Colab setup
    print("Running in Google Colab")
except ImportError:
    # Running locally
    script_dir = os.path.dirname(os.path.abspath("__file__"))
    project_root = os.path.abspath(os.path.join(script_dir, '.'))
    print("Running locally")

if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"Project root: {project_root}")

# Import ConfigManager
from utils.config import ConfigManager


Running locally
Project root: /Users/erenyavuz/Desktop/KU/25 Spring/COMP447/Project/Repo/FlightVision


In [3]:

config_path = os.path.join(project_root, 'config', 'config.yaml')


config = ConfigManager(config_path = config_path)

# set the project root in the config project: 
config.update('project.root', project_root)

# Download the flickr dataset

In [4]:
# Download Flickr8k dataset
from functions.dataset import DatasetDownloader
from pathlib import Path

datasetDownloader = DatasetDownloader(config)

# Download dataset - removed extra config parameter
if datasetDownloader.download_dataset(verbose=True):
    # Verify dataset
    images_dir = Path(config.get('project.root')) / 'data' / 'Images'
    image_count = len(list(images_dir.glob('*.jpg')))
    print(f"✅ Flickr8k Dataset: {image_count:,} images ready")
else:
    print("❌ Dataset download failed")

Starting download process for Flickr8k...
Dataset already exists: 8091 files found
✅ Flickr8k Dataset: 8,091 images ready


# Download the base model

# Install Dependencies

In [None]:
# Install required dependencies
print("📦 Installing required dependencies...")
!pip install -q open-clip-torch faiss-cpu transformers timm
print("✅ Dependencies installed!")

In [5]:
# Download base MobileCLIP model
from functions.model import download_base_model

if download_base_model(config, verbose=True):
    print("🚀 Base model ready for training!")
else:
    print("❌ Base model download failed")



✅ Base model already exists.
🚀 Base model ready for training!


# Load the base model

In [6]:
from functions.model import load_model

base_model, preprocess, tokenizer = load_model(config, verbose=True)

💻 CUDA not available, using CPU
Loading mobileclip_s0 model...
Loading from checkpoint: /Users/erenyavuz/Desktop/KU/25 Spring/COMP447/Project/Repo/FlightVision/checkpoints/mobileclip_s0.pt
✅ Model loaded successfully on cpu


# Test Custom Dataset with Train/Test Split

Test the CustomDataset class to ensure proper train/test splitting without data leakage.

In [9]:
# Test CustomDataset class with single instance
from functions.dataset import CustomDataset

# Create a single dataset instance
print("Creating dataset instance...")
dataset = CustomDataset(config, test_ratio=0.125, transform=preprocess)


Creating dataset instance...


# Evaluate the base model

In [None]:
from functions.evaluate import evaluate_dataset

# Evaluate the base model
print("Evaluating base model performance...")

# Run evaluation
evaluation_results = evaluate_dataset(
    model=base_model,
    testDataset=dataset,
    config=config,
    tokenizer=tokenizer,  # Pass the tokenizer here
    k_values=[1, 5, 10, 20],  # You can customize k values
    force_rebuild_index=False,  # Set to True to force rebuild FAISS index
    verbose=True
    
)

# The results will be automatically saved and printed
print("Evaluation complete!")

Evaluating base model performance...
Starting dataset evaluation...
Model: mobileclip_s0
Device: cpu
Dataset split: 5110 train, 730 test

Evaluating train split
Loaded index with 5110 vectors
Using cached FAISS index
Evaluating retrieval on train split...
Processing 5110 queries...
Processing query 1/5110


KeyboardInterrupt: 

# Train the model

# Evaluate the model