<a href="https://colab.research.google.com/github/divyaprabhakaran7/Code-Mixed-Spanish-VLM-Study/blob/main/CLIP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torchvision



In [None]:
pip install tqdm



In [None]:
from PIL import Image
import requests
import torch

from transformers import CLIPProcessor, CLIPModel

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

print("✅ CLIP model and processor loaded successfully.")

# Test text and dummy image
text = "A happy tweet"
image = Image.new("RGB", (224, 224), color="white")

inputs = processor(text=[text], images=image, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

print("✅ Model forward pass completed.")


✅ CLIP model and processor loaded successfully.
✅ Model forward pass completed.


In [None]:
import pandas as pd
import torch
import zipfile
from io import BytesIO
from PIL import Image
from IPython.display import display
from transformers import CLIPProcessor, CLIPModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from tqdm import tqdm

# ---- Step 1: Manual File Uploads ----
print("⬆️ Please upload your training CSV, test CSV, and ZIP of images using the upload buttons below.")

import ipywidgets as widgets

upload_train_csv = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Train CSV')
upload_test_csv = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Test CSV')
upload_zip = widgets.FileUpload(accept='.zip', multiple=False, description='Upload Image ZIP')
display(upload_train_csv, upload_test_csv, upload_zip)


⬆️ Please upload your training CSV, test CSV, and ZIP of images using the upload buttons below.


FileUpload(value={}, accept='.csv', description='Upload Train CSV')

FileUpload(value={}, accept='.csv', description='Upload Test CSV')

FileUpload(value={}, accept='.zip', description='Upload Image ZIP')

In [None]:
# --- Read CSV ---
def get_dataframe(upload_widget):
    for file in upload_widget.value.values():
        content = file['content']
        return pd.read_csv(BytesIO(content))
    return None

# --- Extract images from zip ---
image_dict = {}

def extract_images(upload_widget):
    print("📦 Extracting images from ZIP (with nested folders)...")
    for file in upload_widget.value.values():
        zf = zipfile.ZipFile(BytesIO(file['content']))
        all_files = zf.namelist()
        print(f"Total files in ZIP: {len(all_files)}")
        for full_path in all_files:
            print("  →", full_path)  # 👈 print every file path


# --- Load CLIP ---
print("📥 Loading CLIP model...")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
print("✅ CLIP model and processor loaded.")


📥 Loading CLIP model...
✅ CLIP model and processor loaded.


In [None]:
# --- Extract CLIP features ---
def extract_features(df, text_col='Original English', file_col='file name'):
    features = []
    missing = 0

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Extracting features"):
        text = row[text_col]
        fname = row[file_col]

        # Text embedding
        text_inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            text_feat = model.get_text_features(**text_inputs)
            text_feat /= text_feat.norm(p=2, dim=-1, keepdim=True)

        # Image embedding
        if fname in image_dict:
            image = image_dict[fname]
            image_inputs = processor(images=image, return_tensors="pt")
            with torch.no_grad():
                image_feat = model.get_image_features(**image_inputs)
                image_feat /= image_feat.norm(p=2, dim=-1, keepdim=True)
        else:
            image_feat = torch.zeros_like(text_feat)
            missing += 1

        combined = torch.cat([text_feat, image_feat], dim=1).squeeze().numpy()
        features.append(combined)

    if missing > 0:
        print(f"⚠️ {missing} images were missing.")
    return features


In [None]:
from google.colab import files

# Upload training CSV, test CSV, and ZIP of images
uploaded = files.upload()

Saving Images.zip to Images.zip


In [43]:
import zipfile
from PIL import Image

image_dict = {}
zip_path = "Images.zip"

with zipfile.ZipFile(zip_path, 'r') as zf:
    for full_path in zf.namelist():
        # Skip junk files from macOS
        if ('__MACOSX' in full_path or '/._' in full_path or full_path.startswith('._')):
            continue

        if full_path.lower().endswith(('.jpg', '.jpeg', '.png')) and not full_path.endswith('/'):
            filename = full_path.split('/')[-1].strip().lower()
            try:
                with zf.open(full_path) as img_file:
                    img = Image.open(img_file).convert("RGB")
                    image_dict[filename] = img
            except Exception as e:
                print(f"❌ Could not load {filename}: {e}")

print(f"✅ Loaded {len(image_dict)} images.")
print("📂 Sample image keys:", list(image_dict.keys())[:5])


✅ Loaded 4869 images.
📂 Sample image keys: ['63.jpg', '4217.jpg', '2100.jpg', '4565.jpg', '77.jpg']


In [44]:
# Run everything
train_df = get_dataframe(upload_train_csv)
print("Sample filenames from CSV:", train_df['file name'].unique()[:10])

test_df = get_dataframe(upload_test_csv)
extract_images(upload_zip)
print("Sample image filenames loaded:", list(image_dict.keys())[:10])

# Normalize filenames in the CSV
train_df['file name'] = train_df['file name'].str.strip().str.lower()
test_df['file name'] = test_df['file name'].str.strip().str.lower()

print("🧠 Extracting features from training data...")
X_train = extract_features(train_df)
y_train = train_df['label'].tolist()

print("🧪 Extracting features from test data...")
X_test = extract_features(test_df)
y_test = test_df['label'].tolist()

print("🔧 Training classifier...")
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

print("📈 Predicting...")
y_pred = clf.predict(X_test)

print("\n📊 Sentiment Classification Report:")
print(classification_report(y_test, y_pred))


Sample filenames from CSV: ['100.jpg' '1001.jpg' '1002.jpg' '1009.jpg' '1018.jpg' '102.jpg'
 '1020.jpg' '1023.jpg' '1024.jpg' '1030.jpg']
📦 Extracting images from ZIP (with nested folders)...
Sample image filenames loaded: ['63.jpg', '4217.jpg', '2100.jpg', '4565.jpg', '77.jpg', '1353.jpg', '1421.jpg', '1390.jpg', '2841.jpg', '162.jpg']
🧠 Extracting features from training data...


Extracting features: 100%|██████████| 3213/3213 [17:56<00:00,  2.99it/s]


🧪 Extracting features from test data...


Extracting features: 100%|██████████| 804/804 [04:38<00:00,  2.89it/s]


🔧 Training classifier...
📈 Predicting...

📊 Sentiment Classification Report:
              precision    recall  f1-score   support

    negative       0.60      0.73      0.66       268
     neutral       0.50      0.37      0.42       268
    positive       0.61      0.64      0.62       268

    accuracy                           0.58       804
   macro avg       0.57      0.58      0.57       804
weighted avg       0.57      0.58      0.57       804

