In [1]:
import clip
import torch
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# use 5k compressed images
photos = pd.read_csv('photos-5k-pred.csv')
img_folder = '5k-compressed'

In [3]:
print(f'rows with iso null values: {photos.exif_iso.isnull().sum()}')
photos.dropna(subset=['exif_iso'], inplace=True)  # remove those rows
print('remove iso null value')
print(f'rows with iso null values: {photos.exif_iso.isnull().sum()}')

rows with iso null values: 665
remove iso null value
rows with iso null values: 0


In [4]:
# function to get a new derived column for iso
def get_iso_level(row):
    if row.exif_iso >= 600:
        return 'high'
    else:
        return 'low'

photos['iso_level'] = photos.apply(lambda row: get_iso_level(row), axis=1)
photos.iso_level.value_counts(normalize=True)

low     0.785698
high    0.214302
Name: iso_level, dtype: float64

In [5]:
# use stratified sampling to split the train and test

X = photos.drop('iso_level', axis=1)
y = photos.iso_level

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

In [6]:
# sanity check for stratified sampling
print(y_train.value_counts(normalize=True))
print(y_test.value_counts(normalize=True))

low     0.785755
high    0.214245
Name: iso_level, dtype: float64
low     0.785467
high    0.214533
Name: iso_level, dtype: float64


In [7]:
# select device: mps for Apple M1, cuda for GPU
device = "mps" if torch.backends.mps.is_available() else "cpu"
#device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# local model and preprocessor
model, preprocess = clip.load("ViT-B/32", device=device)  # ViT-L/14@336px is the latest, ViT-B/32 is the first release, use ViT-L/14

mps


In [8]:
def get_features(df, img_folder):
    all_features = []

    BATCH_SIZE = 500
    total_photos = len(df)

    for i in tqdm(range(0, total_photos, BATCH_SIZE)):
        
        # preprocess the images by batch
        images = [
            preprocess(  
                Image.open(f'./'+ img_folder + f'/{img_id}.jpg')
            ) for img_id in df['photo_id'].iloc[i:i+BATCH_SIZE]
        ]
        
        # stack the preprocess the images from list to tensor
        image_input = torch.tensor(np.stack(images)).to(device)  

        with torch.no_grad():
            image_features = model.encode_image(image_input)  # encode image
            #print(image_features.shape)  # BATCH_SIZE, 512
            all_features.append(image_features)  # append encoding result for each batch
    
    return torch.cat(all_features).cpu().numpy()  # concatenate all features

In [9]:
train_features = get_features(X_train, img_folder)
test_features = get_features(X_test, img_folder)

100%|██████████| 7/7 [01:15<00:00, 10.85s/it]
100%|██████████| 2/2 [00:18<00:00,  9.41s/it]


In [10]:
# Evaluate using a random forest classifier

rf_cls = RandomForestClassifier()
rf_cls.fit(train_features, y_train)

y_pred = rf_cls.predict(test_features)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy = {accuracy:.3f}")

Accuracy = 0.818
