In [10]:
import os
import pandas as pd
import torch
from PIL import Image
from transformers import ViTModel, ViTFeatureExtractor, DeiTFeatureExtractor, DeiTModel, AutoFeatureExtractor, AutoModel
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from torchvision import models, transforms

# Load data
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
data = pd.read_csv('./data/train_preprocessed_qt+2.csv')

# Define columns
traits_columns = data.columns[-6:]
features_columns = data.columns.difference(traits_columns)

# Construct image paths
data['image_path'] = data['id'].apply(lambda x: os.path.join('./data/train_images', f"{x}.jpeg"))

# feature_extractor = AutoFeatureExtractor.from_pretrained('microsoft/swin-base-patch4-window7-224')
# model = AutoModel.from_pretrained('microsoft/swin-base-patch4-window7-224').to(device)  # not as good as VIT

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTModel.from_pretrained('google/vit-base-patch16-224').to(device)

# feature_extractor = DeiTFeatureExtractor.from_pretrained('facebook/deit-base-distilled-patch16-224')
# model = DeiTModel.from_pretrained('facebook/deit-base-distilled-patch16-224').to(device)  # not as good as VIT

# feature_extractor = AutoFeatureExtractor.from_pretrained('facebook/dino-vitb16')
# model = AutoModel.from_pretrained('facebook/dino-vitb16').to(device)  # not as good as VIT

# model = models.resnet50(weights='DEFAULT').to(device)
# model.eval()  # Set the model to evaluation mode
# feature_extractor = transforms.Compose([
#     transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
#     transforms.ToTensor(),  # Convert image to PyTorch tensor
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize based on ImageNet stats
# ])
# def extract_features(image_path):
#     image = Image.open(image_path).convert('RGB')
#     inputs = feature_extractor(image).unsqueeze(0).to(device)  # Add batch dimension and move to device
#     with torch.no_grad():
#         outputs = model(inputs)
#     return outputs.squeeze().cpu().numpy()

# Function to extract image features
def extract_features(image_path):
    image = Image.open(image_path).convert('RGB')
    inputs = feature_extractor(images=image, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()

print("Encoding image features...")

# Extract features for each image
data['image_encodes'] = [extract_features(img) for img in tqdm(data['image_path'], desc="Encoding Features")]

print("Image feature encoding completed")


Using device: cuda


Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Extracting image features...


Extracting Features: 100%|██████████| 39277/39277 [15:48<00:00, 41.40it/s]

Image feature extraction completed





In [11]:
import numpy as np

image_features_array = np.vstack(data['image_encodes'].values)

image_features_df = pd.DataFrame(image_features_array, index=data.index)

data = pd.concat([data, image_features_df], axis=1)


In [12]:
data = data.drop(columns=['id', 'image_path', 'image_encodes'])
data.columns = data.columns.astype(str)

X = data.drop(columns=traits_columns)

In [17]:
from autogluon.tabular import TabularPredictor

# Define the target feature and split the data
target_feature = 5
target_column = traits_columns[target_feature]
y = data[target_column]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

train_df = pd.concat([X_train, y_train], axis=1)
val_df = pd.concat([X_val, y_val], axis=1)

# Initialize the TabularPredictor
predictor = TabularPredictor(
    label=target_column,
    problem_type='regression',
    eval_metric='r2',
    path=f'./vit-model-qt+2-{target_feature}'  # Path to save the model
)

# Train the model
predictor.fit(
    train_data=train_df,
    tuning_data=val_df,
    time_limit=7200  # Time limit for training
)

# Evaluate model performance
performance = predictor.evaluate(val_df)
print(performance)


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.1
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.19045
CPU Count:          16
Memory Avail:       9.26 GB / 31.77 GB (29.1%)
Disk Space Avail:   11.31 GB / 300.00 GB (3.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Beginning AutoGluon training ... Time limit = 7200s
AutoGluon will save models to "./vit-model-qt+2-5"
Train Data Rows:    35

[1000]	valid_set's l2: 0.0198599	valid_set's r2: 0.366548
[2000]	valid_set's l2: 0.0196857	valid_set's r2: 0.372103
[3000]	valid_set's l2: 0.0195937	valid_set's r2: 0.375041
[4000]	valid_set's l2: 0.0195312	valid_set's r2: 0.377033
[5000]	valid_set's l2: 0.0195118	valid_set's r2: 0.377652
[6000]	valid_set's l2: 0.0195004	valid_set's r2: 0.378014
[7000]	valid_set's l2: 0.0195029	valid_set's r2: 0.377936


	0.3782	 = Validation score   (r2)
	178.26s	 = Training   runtime
	0.18s	 = Validation runtime
Fitting model: LightGBM ... Training model for up to 7013.4s of the 7013.4s of remaining time.


[1000]	valid_set's l2: 0.0197672	valid_set's r2: 0.369504


	0.37	 = Validation score   (r2)
	47.26s	 = Training   runtime
	0.06s	 = Validation runtime
Fitting model: RandomForestMSE ... Training model for up to 6966.03s of the 6966.02s of remaining time.
	0.3267	 = Validation score   (r2)
	1909.61s	 = Training   runtime
	0.14s	 = Validation runtime
Fitting model: CatBoost ... Training model for up to 5055.84s of the 5055.83s of remaining time.
	0.3793	 = Validation score   (r2)
	783.01s	 = Training   runtime
	0.02s	 = Validation runtime
Fitting model: ExtraTreesMSE ... Training model for up to 4272.74s of the 4272.74s of remaining time.
	0.3299	 = Validation score   (r2)
	400.72s	 = Training   runtime
	0.12s	 = Validation runtime
Fitting model: NeuralNetFastAI ... Training model for up to 3871.44s of the 3871.44s of remaining time.
No improvement since epoch 4: early stopping
	0.3409	 = Validation score   (r2)
	49.46s	 = Training   runtime
	0.14s	 = Validation runtime
Fitting model: XGBoost ... Training model for up to 3821.77s of the 3821.76s

[1000]	valid_set's l2: 0.0195251	valid_set's r2: 0.377226
[2000]	valid_set's l2: 0.0194795	valid_set's r2: 0.378681
[3000]	valid_set's l2: 0.0194731	valid_set's r2: 0.378885
[4000]	valid_set's l2: 0.0194705	valid_set's r2: 0.37897
[5000]	valid_set's l2: 0.0194701	valid_set's r2: 0.378983
[6000]	valid_set's l2: 0.0194699	valid_set's r2: 0.378989
[7000]	valid_set's l2: 0.0194698	valid_set's r2: 0.378991
[8000]	valid_set's l2: 0.0194698	valid_set's r2: 0.378991
[9000]	valid_set's l2: 0.0194698	valid_set's r2: 0.378991


	0.379	 = Validation score   (r2)
	835.27s	 = Training   runtime
	0.76s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 719.7s of the 2890.31s of remaining time.
	Ensemble Weights: {'KNeighborsDist': 0.304, 'CatBoost': 0.174, 'NeuralNetFastAI': 0.174, 'NeuralNetTorch': 0.13, 'LightGBMLarge': 0.13, 'LightGBMXT': 0.087}
	0.4177	 = Validation score   (r2)
	0.08s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 4309.96s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1727.8 rows/s (3928 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("./vit-model-qt+2-5")


{'r2': 0.4177366071672023, 'root_mean_squared_error': -0.1351112483705678, 'mean_squared_error': -0.018255049436253255, 'mean_absolute_error': -0.08730440231539532, 'pearsonr': 0.6466097318357973, 'median_absolute_error': -0.05204206057729309}
