In [None]:
import os
from PIL import Image, ImageEnhance, ImageFilter
import pandas as pd
from tqdm import tqdm
import random
import numpy as np
import torch
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

In [None]:
# Define paths
csv_file = 'train.csv'  
img_dir = 'train'  
augmented_csv_file = 'augmented_train.csv'  # CSV file to save augmented metadata
features_csv_file = 'extracted_features.csv'  # CSV file to save extracted features

In [None]:
# Load csv
train_df = pd.read_csv(csv_file)

In [None]:
print("Initial DataFrame:")
print(train_df.head())
print("\nDataFrame Info:")
print(train_df.info())

# CSV

Preprocessing

In [None]:
# Checking for any missing values
if train_df.isnull().any().any():
    print("Missing values found:")
    print(train_df.isnull().sum())
    
else:
    print("No missing values")

In [None]:
# Checking for duplicates
duplicates = train_df.duplicated().sum()
if duplicates > 0:
    print(f"Duplicates found: {duplicates}")
    train_df.drop_duplicates(inplace=True)
    
else:
    print("No duplicates found")

In [None]:
# Converting pawpularity to float
train_df['Pawpularity'] = train_df['Pawpularity'].astype(float)

# Normalize Pawpularity to a range between 0 and 1
train_df['Pawpularity'] = train_df['Pawpularity'] / 100.0  # [0, 1]


# Images

Augmentation Functions

In [None]:
def resize_image(img, size=(224, 224)):
    return img.resize(size)

def normalize_image(img):
    img_array = np.array(img) / 255.0
    return Image.fromarray((img_array * 255).astype(np.uint8))  # Convert back to image if needed

def random_flip(img):
    if random.random() > 0.3:
        return img.transpose(Image.FLIP_LEFT_RIGHT)
    return img

def random_vertical_flip(img):
    if random.random() > 0.3:
        return img.transpose(Image.FLIP_TOP_BOTTOM)
    return img

def random_rotation(img, max_angle=10): 
    angle = random.uniform(-max_angle, max_angle)
    return img.rotate(angle)

def color_jitter(img, brightness=0.1, contrast=0.1, saturation=0.1):  
    img = ImageEnhance.Brightness(img).enhance(1 + random.uniform(-brightness, brightness))
    img = ImageEnhance.Contrast(img).enhance(1 + random.uniform(-contrast, contrast))
    img = ImageEnhance.Color(img).enhance(1 + random.uniform(-saturation, saturation))
    return img

def gaussian_blur(img, radius=0.5): 
    return img.filter(ImageFilter.GaussianBlur(radius))

def random_grayscale(img, p=0.1): 
    if random.random() < p:
        return img.convert("L").convert("RGB")  # Convert to grayscale and back to RGB
    return img

def random_noise(img, noise_factor=0.01):  
    img_array = np.array(img)
    noise = np.random.normal(0, noise_factor * 255, img_array.shape).astype(np.uint8)
    noisy_img_array = np.clip(img_array + noise, 0, 255)
    return Image.fromarray(noisy_img_array.astype(np.uint8))

def random_perspective(img, distortion=0.1): 
    width, height = img.size
    x_shift = distortion * width
    y_shift = distortion * height

    coeffs = [
        x_shift, y_shift,
        width - x_shift, y_shift,
        width - x_shift, height - y_shift,
        x_shift, height - y_shift,
    ]
    return img.transform(img.size, Image.QUAD, coeffs)


In [None]:
# Generating 5 augmented images for each original image, with combinations of 3 or more possible augmentations
def augment_image(img, num_augmentations=5):
    augmentations = [
        random_flip,
        random_vertical_flip,
        random_rotation,
        color_jitter,
        gaussian_blur,
        random_grayscale,
        random_noise,
        random_perspective
    ]
    
    augmented_images = []
    for _ in range(num_augmentations):
        aug_img = img
        for aug in random.sample(augmentations, k=random.randint(3, len(augmentations))):
            aug_img = aug(aug_img)
        aug_img = transforms.Resize((224, 224))(aug_img)
        aug_img = transforms.ToTensor()(aug_img)  # Convert to tensor for deep learning models
        augmented_images.append(aug_img)
    return augmented_images

In [None]:
# Setting up GPU for feature extraction
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
# Load a pre-trained model for feature extraction
model = resnet50(weights=ResNet50_Weights.DEFAULT).to(device)
model.eval()  # Set to evaluation mode

In [None]:
# Define image transformations for feature extraction
feature_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
def extract_features(img):
    img_tensor = feature_transform(img)  # Apply feature extraction transformations
    with torch.no_grad():
        img_tensor = img_tensor.to(device)
        features = model(img_tensor.unsqueeze(0))
    return features.cpu().numpy()

In [None]:
# List to hold rows for the new CSV file
augmented_data = []
features_list = []

In [None]:
for _, row in tqdm(train_df.iterrows(), total=len(train_df)):
    img_path = os.path.join(img_dir, row['Image'])  # Adjust column name if needed
    img = Image.open(img_path).convert("RGB")  # Open and convert image to RGB
    
    # Generate 5 augmented images
    augmented_images = augment_image(img, num_augmentations=5)
    
    for i, aug_img in enumerate(augmented_images):
        augmented_data.append({'Image': f"{row['Image']}_aug_{i+1}", 'Pawpularity': row['Pawpularity']})
        
        # Extract features for each augmented image
        features = extract_features(aug_img)
        features_list.append((features.flatten(), row['Pawpularity']))

In [None]:
# Saving augmented metadata to csv
augmented_df = pd.DataFrame(augmented_data)
augmented_df.to_csv(augmented_csv_file, index=False)

In [None]:
# Saving features to CSV
features_df = pd.DataFrame(features_list, columns=['Features', 'Pawpularity'])
features_df['Features'] = features_df['Features'].apply(lambda x: ','.join(map(str, x)))  # Flatten for CSV
features_df.to_csv(features_csv_file, index=False)