In [68]:
from google.colab import drive

# Mount Google Drive to the default mount point
drive.mount('/content/drive')

# Import the os module
import os

# Create the project folder if it doesn't exist
project_folder = '/content/drive/MyDrive/FinalProject'
if not os.path.exists(project_folder):
    os.makedirs(project_folder)

# Now you can access files within your project folder
# Example:
# with open(os.path.join(project_folder, 'my_file.txt'), 'r') as f:
#     # Process the file contents

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [69]:
!mkdir -p $project_folder

In [70]:
from torchvision.datasets import EuroSAT
from torchvision import transforms

eurosat_dir = f'{project_folder}/eurosat'

!mkdir -p $eurosat_dir
dataset = EuroSAT(root=eurosat_dir, download=True)

In [2]:
!pip install torch torchvision



In [3]:
!pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-viyov35t
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-viyov35t
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=bf023113804061a04c8ca5cec896f7a607df79e5c071b0f67146f1599b825abc
  Stored in directory: /tmp/pip-ephem-wheel-cache-aww87ii0/wheels/da/2b/4c/d6691fa9597aac8bb

In [66]:
import clip
import torch
from torchvision.datasets import EuroSAT
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('RN50', device)

In [67]:
print(model.visual)

ModifiedResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU(inplace=True)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU(inplace=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU(inplace=True)
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), str

In [56]:
import random

eurosat_dir = "/content/drive/MyDrive/FinalProject/eurosat"

transform = preprocess

dataset = EuroSAT(root=eurosat_dir, download=False, transform=transform)

total_samples = len(dataset)
sample_indices = random.sample(range(total_samples), 1000)
subset_dataset = torch.utils.data.Subset(dataset, sample_indices)

dataloader = DataLoader(subset_dataset, batch_size=128, shuffle=False)

In [57]:
# EuroSAT 클래스 라벨
euro_classes = [
    "Annual Crop", "Forest", "Herbaceous Vegetation",
    "Highway", "Industrial Area", "Pasture",
    "Permanent Crop", "Residential Area", "River", "Sea/Lake"
]

# CLIP 텍스트 프롬프트 생성
text_prompts = [f"a photo of {label}" for label in euro_classes]
text_inputs = clip.tokenize(text_prompts).to(device)

In [58]:
with torch.no_grad():
    text_features = model.encode_text(text_inputs)
    text_features /= text_features.norm(dim=-1, keepdim=True)

correct = 0
total = 0

print("Running Zero-shot Eval on EuroSAT...")
for images, target in tqdm(dataloader):
    images = images.to(device)
    target = target.to(device)

    with torch.no_grad():
        image_features = model.encode_image(images)
        image_features /= image_features.norm(dim=-1, keepdim=True)

    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    predictions = similarity.argmax(dim=1)

    correct += (predictions == target).sum().item()
    total += target.size(0)

accuracy_original = 100.0 * correct / total
print()
print(f"Accuracy: {accuracy_original:.2f}%")

Running Zero-shot Eval on EuroSAT...


100%|██████████| 8/8 [02:45<00:00, 20.63s/it]


Accuracy: 20.20%





In [59]:
import torch.nn as nn

def replace_af(module, old_act=nn.ReLU, new_act=nn.Tanh):
    for name, child in module.named_children():
        if isinstance(child, old_act):
            setattr(module, name, new_act())
        else:
            replace_af(child, old_act, new_act)

replace_af(model.visual)
print(model.visual)

ModifiedResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): Tanh()
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): Tanh()
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): Tanh()
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): Tanh()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (b

In [60]:
with torch.no_grad():
    text_features = model.encode_text(text_inputs)
    text_features /= text_features.norm(dim=-1, keepdim=True)

correct = 0
total = 0

print("Running Zero-shot Eval on EuroSAT...(with Tanh AF)")
for images, target in tqdm(dataloader):
    images = images.to(device)
    target = target.to(device)

    with torch.no_grad():
        image_features = model.encode_image(images)
        image_features /= image_features.norm(dim=-1, keepdim=True)

    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    predictions = similarity.argmax(dim=1)

    correct += (predictions == target).sum().item()
    total += target.size(0)

accuracy_tanh = 100.0 * correct / total
print()
print(f"Accuracy_Tanh: {accuracy_tanh:.2f}%")

Running Zero-shot Eval on EuroSAT...(with Tanh AF)


100%|██████████| 8/8 [00:04<00:00,  1.95it/s]


Accuracy_Tanh: 9.50%





In [61]:
replace_af(model.visual, nn.Tanh, nn.LeakyReLU)
print(model.visual)

ModifiedResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): LeakyReLU(negative_slope=0.01)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): LeakyReLU(negative_slope=0.01)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): LeakyReLU(negative_slope=0.01)
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): LeakyReLU(negative_slope=0.01)
     

In [62]:
with torch.no_grad():
    text_features = model.encode_text(text_inputs)
    text_features /= text_features.norm(dim=-1, keepdim=True)

correct = 0
total = 0

print("Running Zero-shot Eval on EuroSAT...(with LeakyReLU AF)")
for images, target in tqdm(dataloader):
    images = images.to(device)
    target = target.to(device)

    with torch.no_grad():
        image_features = model.encode_image(images)
        image_features /= image_features.norm(dim=-1, keepdim=True)

    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    predictions = similarity.argmax(dim=1)

    correct += (predictions == target).sum().item()
    total += target.size(0)

accuracy_LReLU = 100.0 * correct / total
print()
print(f"Accuracy_LeakyReLU: {accuracy_LReLU:.2f}%")

Running Zero-shot Eval on EuroSAT...(with LeakyReLU AF)


100%|██████████| 8/8 [00:04<00:00,  1.90it/s]


Accuracy_LeakyReLU: 11.90%





In [63]:
replace_af(model.visual, nn.LeakyReLU, nn.SiLU)
print(model.visual)

ModifiedResNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): SiLU()
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): SiLU()
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): SiLU()
  (avgpool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): SiLU()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (b

In [64]:
with torch.no_grad():
    text_features = model.encode_text(text_inputs)
    text_features /= text_features.norm(dim=-1, keepdim=True)

correct = 0
total = 0

print("Running Zero-shot Eval on EuroSAT...(with LeakyReLU AF)")
for images, target in tqdm(dataloader):
    images = images.to(device)
    target = target.to(device)

    with torch.no_grad():
        image_features = model.encode_image(images)
        image_features /= image_features.norm(dim=-1, keepdim=True)

    similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
    predictions = similarity.argmax(dim=1)

    correct += (predictions == target).sum().item()
    total += target.size(0)

accuracy_SiLU = 100.0 * correct / total
print()
print(f"Accuracy_SiLU: {accuracy_SiLU:.2f}%")

Running Zero-shot Eval on EuroSAT...(with LeakyReLU AF)


100%|██████████| 8/8 [00:04<00:00,  1.86it/s]


Accuracy_SiLU: 10.90%





In [65]:
print(f"Accuracy_Original: {accuracy_original:.2f}%")
print(f"Accuracy_Tanh: {accuracy_tanh:.2f}%")
print(f"Accuracy_LeakyReLU: {accuracy_LReLU:.2f}%")
print(f"Accuracy_SiLU: {accuracy_SiLU:.2f}%")

Accuracy_Original: 20.20%
Accuracy_Tanh: 9.50%
Accuracy_LeakyReLU: 11.90%
Accuracy_SiLU: 10.90%
