# Pull the LightVision repository

In [1]:
try:
    import google.colab
    !git clone https://github.com/erenyavuz02/LightVision.git
    print("Repository cloned in Google Colab")
except ImportError:
    print("Running locally - repository clone skipped")

Running locally - repository clone skipped


# Set the config manager to use the local config file

In [2]:
import os
import sys

# Add project root to path
try:
    # Try to detect if running in Google Colab
    import google.colab
    # If running in Google Colab, you need to manually set the project root
    # Update this path to match where you cloned/uploaded your project in Colab
    project_root = '/content/LightVision'  # Adjust this path as needed for your Colab setup
    print("Running in Google Colab")
except ImportError:
    # Running locally
    script_dir = os.path.dirname(os.path.abspath("__file__"))
    project_root = os.path.abspath(os.path.join(script_dir, '.'))
    print("Running locally")
    

if project_root not in sys.path:
    sys.path.insert(0, project_root)

print(f"Project root: {project_root}")

# Import ConfigManager
from utils.config import ConfigManager


Running locally
Project root: /Users/erenyavuz/Desktop/KU/25 Spring/COMP447/Project/Repo/FlightVision


In [3]:
config = ConfigManager(config_path = os.path.join(project_root, 'config/config.yaml'))

# set the project root in the config project: 
config.update('project.root', project_root)

# Download the flickr dataset

In [4]:
# Download Flickr8k dataset
from functions.dataset import DatasetDownloader
from pathlib import Path

datasetDownloader = DatasetDownloader(config)

# Download dataset - removed extra config parameter
if datasetDownloader.download_dataset(verbose=True):
    # Verify dataset
    images_dir = Path(config.get('project.root')) / 'data' / 'Images'
    image_count = len(list(images_dir.glob('*.jpg')))
    print(f"✅ Flickr8k Dataset: {image_count:,} images ready")
else:
    print("❌ Dataset download failed")
    

Starting download process for Flickr8k...
Dataset already exists: 8091 files found
✅ Flickr8k Dataset: 8,091 images ready


# Download the base model

In [5]:
# Download base MobileCLIP model
from functions.model import download_base_model

if download_base_model(config, verbose=True):
    print("🚀 Base model ready for training!")
else:
    print("❌ Base model download failed")



✅ Base model already exists.
🚀 Base model ready for training!


# Load the base model

In [6]:
from functions.model import load_model

base_model, preprocess, tokenizer = load_model(config, verbose=True)

💻 CUDA not available, using CPU
Loading mobileclip_s0 model...
Loading from checkpoint: /Users/erenyavuz/Desktop/KU/25 Spring/COMP447/Project/Repo/FlightVision/checkpoints/mobileclip_s0.pt
✅ Model loaded successfully on cpu


# Test Custom Dataset with Train/Test Split

Test the CustomDataset class to ensure proper train/test splitting without data leakage.

In [7]:
# Test CustomDataset class with single instance
from functions.dataset import CustomDataset

# set the captions file here
config.update('dataset.captions_file', 'all_captions.json')  # Use all captions for testing

# Create a single dataset instance
print("Creating dataset instance...")
dataset = CustomDataset(config, test_ratio=0.125, transform=preprocess)


Creating dataset instance...


# Evaluate the base model

In [None]:
import importlib
from functions.train import train_model

import functions.train
importlib.reload(functions.train)

In [None]:
from functions.evaluate import evaluate_dataset

# Evaluate the base model
print("Evaluating base model performance...")

# Run evaluation
evaluation_results = evaluate_dataset(
    model=base_model,
    testDataset=dataset,
    config=config,
    tokenizer = tokenizer,
    k_values=[1, 5, 10, 20],  # You can customize k values
    force_rebuild_index=True,  # Set to True to force rebuild FAISS index
    verbose=True,
    only_test=True  # Set to True to evaluate only on the test set
)

# The results will be automatically saved and printed
print("Evaluation complete!")

# Model Training with Custom Dataset

Now we'll train our model using the custom dataset with positional embedding modifications. This will:
1. Apply the positional embedding strategy to improve text encoding
2. Train the modified model on our custom dataset
3. Track training progress with real-time visualizations

In [8]:
# Import training functions
from functions.train import train_model

# Set training parameters
training_config = {
    'num_epochs': 15,
    'batch_size': 8,  
    'learning_rate': 1e-4,
}

print("Starting training process...")
print(f"Configuration: {training_config}")
print("\n" + "="*60)

Starting training process...
Configuration: {'num_epochs': 15, 'batch_size': 8, 'learning_rate': 0.0001}



In [46]:
import importlib
from functions.train import train_model

#reload train model function
import functions.train
importlib.reload(functions.train)
# Train the model

# reload the mod_77_token_training.py
import functions.mod_77_token_training
importlib.reload(functions.mod_77_token_training)

<module 'functions.mod_77_token_training' from '/Users/erenyavuz/Desktop/KU/25 Spring/COMP447/Project/Repo/FlightVision/functions/mod_77_token_training.py'>

In [47]:
# Train the model with positional embedding modifications
trained_model = train_model(
    model= base_model,  # Your original CLIP model
    config=config,  # Your existing config
    dataset = dataset,
    num_epochs=training_config['num_epochs'],
    batch_size=training_config['batch_size'],
    learning_rate=training_config['learning_rate'],
    tokenizer=tokenizer,  # Tokenizer for text processing
)

print("\n🎉 Training completed successfully!")
print("The model has been trained with:")
print("✓ Modified positional embeddings")
print("✓ Contrastive learning approach")

✓ Model successfully deep copied
Applying positional embedding modification...
Original positional embedding: LearnablePositionalEmbedding(num_embeddings=77, embedding_dim=512, padding_idx=None)
Modified Positional Embedding shape: torch.Size([1, 1, 77, 512])
Positional embedding successfully modified!
Using mod 77 token training with subsections...


Epoch 1/15:   0%|          | 2/885 [00:08<1:00:23,  4.10s/it, loss=84, avg_loss=72.9, mode=mod_77]  


KeyboardInterrupt: 

In [None]:
# Load the best model for further evaluation
import torch

# Load the best saved model
checkpoint = torch.load('best_model.pth')
base_model.load_state_dict(checkpoint['model_state_dict'])

print("Best model loaded!")
print(f"Best validation loss achieved: {checkpoint['val_loss']:.4f}")
print(f"Best model was saved at epoch: {checkpoint['epoch'] + 1}")

# You can now use this trained model for inference
#base_model.eval()

# Evaluate the trained model

In [None]:
# evaluate the trained model
evaluation_results = evaluate_dataset(
    model=base_model,
    testDataset=dataset,
    config=config,
    k_values=[1, 5, 10, 20],  # You can customize k values
    force_rebuild_index=True,  # Set to True to force rebuild FAISS index
    verbose=True,
    only_test=True  # Set to True to evaluate only on the test set
)
# The results will be automatically saved and printed
print("Evaluation of the trained model complete!")

