In [1]:
import torch
from torchvision import models

model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.DEFAULT)

model.eval()

weights = models.EfficientNet_B4_Weights.DEFAULT.get_state_dict(progress=True)

Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 146MB/s] 


In [2]:
import torch
from torch import nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

model = models.efficientnet_b4(weights=models.EfficientNet_B4_Weights.DEFAULT)

model.classifier[1] = nn.Linear(model.classifier[1].in_features, 10) 

for param in model.features.parameters():
    param.requires_grad = False

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

optimizer = optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

epochs = 3
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy on CIFAR-10: {accuracy:.2f}%')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 78097727.47it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1, Loss: 1.665056896987666
Epoch 2, Loss: 1.060906616411984
Epoch 3, Loss: 0.8677804567115245
Test Accuracy on CIFAR-10: 80.07%


In [6]:
import torch
from torchvision import models

model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

model.eval()

weights = models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1.get_state_dict(progress=True)

In [10]:
import torch
from torch import nn
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

model = models.vit_b_16(weights=models.ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)

model.heads.head = nn.Linear(model.heads.head.in_features, 10) 

for param in model.parameters():
    param.requires_grad = False

for param in model.heads.parameters():
    param.requires_grad = True

transform = transforms.Compose([
    transforms.Resize((384, 384)),  
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

optimizer = optim.AdamW(model.heads.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

epochs = 3
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy on CIFAR-10: {accuracy:.2f}%')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 0.3539914568386834
Epoch 2, Loss: 0.15147645264191842
Epoch 3, Loss: 0.13129001925893066
Test Accuracy on CIFAR-10: 95.22%


In [11]:
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image 
probs = logits_per_image.softmax(dim=1) 

config.json:   0%|          | 0.00/4.10k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/599M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/961k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]



In [12]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

transform = transforms.Compose([
    transforms.Resize((224, 224)),  
    transforms.ToTensor(),         
])

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

cifar10_labels = [
    "airplane", "automobile", "bird", "cat", "deer", 
    "dog", "frog", "horse", "ship", "truck"
]


model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        pil_images = [Image.fromarray((image.permute(1, 2, 0).cpu().numpy() * 255).astype('uint8')) for image in images]
        image_inputs = processor(images=pil_images, return_tensors="pt", padding=True).to(device)
        
        text_inputs = processor(text=cifar10_labels, return_tensors="pt", padding=True).to(device)
        
        outputs = model(**image_inputs, **text_inputs)
        
        logits_per_image = outputs.logits_per_image
        
        probs = logits_per_image.softmax(dim=1)
        
        _, predicted = torch.max(probs, dim=1)
        
        correct += (predicted == labels.to(device)).sum().item()
        total += labels.size(0)

accuracy = 100 * correct / total
print(f'CLIP model accuracy on CIFAR-10 test set: {accuracy:.2f}%')

Files already downloaded and verified
CLIP model accuracy on CIFAR-10 test set: 87.30%
