<a href="https://colab.research.google.com/github/divyaprabhakaran7/Code-Mixed-Spanish-VLM-Study/blob/main/CLIP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers torchvision



In [None]:
pip install tqdm



In [None]:
from PIL import Image
import requests
import torch

from transformers import CLIPProcessor, CLIPModel

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

print("✅ CLIP model and processor loaded successfully.")

# Test text and dummy image
text = "A happy tweet"
image = Image.new("RGB", (224, 224), color="white")

inputs = processor(text=[text], images=image, return_tensors="pt")
with torch.no_grad():
    outputs = model(**inputs)

print("✅ Model forward pass completed.")


✅ CLIP model and processor loaded successfully.
✅ Model forward pass completed.


In [None]:
import pandas as pd
import torch
import zipfile
from io import BytesIO
from PIL import Image
from IPython.display import display
from transformers import CLIPProcessor, CLIPModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from tqdm import tqdm

# ---- Step 1: Manual File Uploads ----
print("⬆️ Please upload your training CSV, test CSV, and ZIP of images using the upload buttons below.")

import ipywidgets as widgets

upload_train_csv = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Train CSV')
upload_test_csv = widgets.FileUpload(accept='.csv', multiple=False, description='Upload Test CSV')
upload_zip = widgets.FileUpload(accept='.zip', multiple=False, description='Upload Image ZIP')
display(upload_train_csv, upload_test_csv, upload_zip)


⬆️ Please upload your training CSV, test CSV, and ZIP of images using the upload buttons below.


FileUpload(value={}, accept='.csv', description='Upload Train CSV')

FileUpload(value={}, accept='.csv', description='Upload Test CSV')

FileUpload(value={}, accept='.zip', description='Upload Image ZIP')

In [20]:
# --- Read CSV ---
def get_dataframe(upload_widget):
    for file in upload_widget.value.values():
        content = file['content']
        return pd.read_csv(BytesIO(content))
    return None

# --- Extract images from zip ---
image_dict = {}

def extract_images(upload_widget):
    print("📦 Extracting images from ZIP...")
    for file in upload_widget.value.values():
        zf = zipfile.ZipFile(BytesIO(file['content']))
        for full_path in zf.namelist():
            if full_path.lower().endswith(('.jpg', '.jpeg', '.png')) and not full_path.endswith('/'):
                filename = full_path.split('/')[-1]
                with zf.open(full_path) as img_file:
                    try:
                        image_dict[filename] = Image.open(img_file).convert("RGB")
                    except:
                        print(f"❌ Could not load image: {filename}")
    print(f"✅ Loaded {len(image_dict)} images.")

# --- Load CLIP ---
print("📥 Loading CLIP model...")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
print("✅ CLIP model and processor loaded.")


📥 Loading CLIP model...
✅ CLIP model and processor loaded.


In [23]:
# --- Extract CLIP features ---
def extract_features(df, text_col='Original English', file_col='file name'):
    features = []
    missing = 0

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Extracting features"):
        text = row[text_col]
        fname = row[file_col]

        # Text embedding
        text_inputs = processor(text=[text], return_tensors="pt", padding=True, truncation=True)
        with torch.no_grad():
            text_feat = model.get_text_features(**text_inputs)
            text_feat /= text_feat.norm(p=2, dim=-1, keepdim=True)

        # Image embedding
        if fname in image_dict:
            image = image_dict[fname]
            image_inputs = processor(images=image, return_tensors="pt")
            with torch.no_grad():
                image_feat = model.get_image_features(**image_inputs)
                image_feat /= image_feat.norm(p=2, dim=-1, keepdim=True)
        else:
            image_feat = torch.zeros_like(text_feat)
            missing += 1

        combined = torch.cat([text_feat, image_feat], dim=1).squeeze().numpy()
        features.append(combined)

    if missing > 0:
        print(f"⚠️ {missing} images were missing.")
    return features


In [24]:
# Run everything
train_df = get_dataframe(upload_train_csv)
test_df = get_dataframe(upload_test_csv)
extract_images(upload_zip)

print("🧠 Extracting features from training data...")
X_train = extract_features(train_df)
y_train = train_df['label'].tolist()

print("🧪 Extracting features from test data...")
X_test = extract_features(test_df)
y_test = test_df['label'].tolist()

print("🔧 Training classifier...")
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

print("📈 Predicting...")
y_pred = clf.predict(X_test)

print("\n📊 Sentiment Classification Report:")
print(classification_report(y_test, y_pred))


📦 Extracting images from ZIP...
✅ Loaded 0 images.
🧠 Extracting features from training data...


Extracting features: 100%|██████████| 3213/3213 [04:22<00:00, 12.25it/s]


⚠️ 3213 images were missing.
🧪 Extracting features from test data...


Extracting features: 100%|██████████| 804/804 [01:10<00:00, 11.37it/s]


⚠️ 804 images were missing.
🔧 Training classifier...
📈 Predicting...

📊 Sentiment Classification Report:
              precision    recall  f1-score   support

    negative       0.59      0.72      0.65       268
     neutral       0.51      0.37      0.43       268
    positive       0.65      0.69      0.67       268

    accuracy                           0.59       804
   macro avg       0.58      0.59      0.58       804
weighted avg       0.58      0.59      0.58       804

