# Fine-Tuning CLIP Models

In [7]:
!pip install git+https://github.com/openai/CLIP.git


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to c:\users\gfrag\appdata\local\temp\pip-req-build-nhye12s1
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: clip
  Building wheel for clip (pyproject.toml): started
  Building wheel for clip (pyproject.toml): finished with status 'done'
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369594 sha256=e44edeca6f2f45c29e3dd872afae3cfbae499facdcccc6dd8308bfab6e6884ce
  Stored in directory: C:\Users\gfrag\AppData\Local\Temp\pip-ephem-wheel-cache-m_sshree\wheels\35\3e\df

  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git 'C:\Users\gfrag\AppData\Local\Temp\pip-req-build-nhye12s1'


In [8]:
import json
from PIL import Image
import os
import torch
import clip
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import zipfile
from tqdm import tqdm
import torch.nn as nn
from torch.utils.data import DataLoader
from transformers import CLIPProcessor, CLIPModel

  from .autonotebook import tqdm as notebook_tqdm


### Step 1: Define Dataset Path

In [10]:
local_data_dir = "New Plant Diseases Dataset(Augmented)"  # Update with your local path

def get_image_paths_and_labels(base_dir):
    classes = os.listdir(base_dir)
    data = []
    for label in classes:
        class_dir = os.path.join(base_dir, label)
        if os.path.isdir(class_dir):
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                data.append((img_path, label))
    return data

## Step 2: Define Custom Dataset

In [11]:
class PlantDiseaseDataset(Dataset):
    def __init__(self, image_label_list, preprocess):
        self.data = image_label_list
        self.preprocess = preprocess
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        image = self.preprocess(image)
        return image, label

## Step 3: Load CLIP Model

In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
device

'cuda'

In [14]:
data = get_image_paths_and_labels(os.path.join(local_data_dir, "train"))
dataset = PlantDiseaseDataset(data, preprocess)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

## Step 4: Fine-tune CLIP Model

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
loss_fn = torch.nn.CrossEntropyLoss()

for epoch in range(5):  # Fine-tune for 5 epochs
    for images, labels in dataloader:
        images = images.to(device)
        text_inputs = clip.tokenize(labels).to(device)
        
        image_features = model.encode_image(images)
        text_features = model.encode_text(text_inputs)
        
        logits_per_image, logits_per_text = model(images, text_inputs)
        labels = torch.arange(len(images)).to(device)
        
        loss = (loss_fn(logits_per_image, labels) + loss_fn(logits_per_text, labels)) / 2
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

print("Fine-tuning complete!")


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


## Step 6: Save the Fine-Tuned Model

In [None]:

torch.save(model.state_dict(), "fine_tuned_clip.pth")
print("Fine-tuning complete! Model saved as fine_tuned_clip.pth")
