In [1]:
# Function to extract latitude and longitude from file name

def extract_lat_lon(file_name):
    # file_names are in hte format lat-long.png. We need to extract the lat and long
    # if number of "-"s in images is 1
    if file_name.count("-") == 1:
        lat = file_name.split('-')[0]
        long = file_name.split('-')[1][:-4]
    elif file_name.count("-") == 2:
        # if lat is the negative one
        if file_name[0] == "-":
            # the file format is then (-lat)-long.png and we wnat to extract -lat and long
            lat = file_name.split('-')[0] + "-" + file_name.split('-')[1]
            long = file_name.split('-')[2][:-4]

        else:
            # the file format is then lat-(-long).png and we wnat to extract lat and -long
            lat = file_name.split('-')[0]
            long = file_name.split('-')[1] + "-" + file_name.split('-')[2][:-4]

    elif file_name.count("-") == 3:
        # file normat is then (-lat)-(-long).png and we want to extract -lat and -long

        lat = file_name.split('-')[0] + "-" + file_name.split('-')[1]
        long = file_name.split('-')[2] + "-" + file_name.split('-')[3][:-4]
    return float(lat), float(long)

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.transforms import Compose, Resize, Normalize, ToTensor
import pandas as pd
from PIL import Image
import os

class PovertyDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        """
        Args:
            annotations_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        # list of file_names
        self.file_names = os.listdir(self.img_dir)
        self.transform = transform

    def __len__(self):
        # return number of files in img_dir
        return len(os.listdir(self.img_dir))

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.file_names[idx])
        lat, long = extract_lat_lon(self.file_names[idx])
        image = Image.open(img_path).convert("RGB")  # Ensure image is RGB

        labels_idx = self.img_labels[(self.img_labels['lat'] == lat) & (self.img_labels['lon'] == long)]
        # if no label
        if labels_idx.empty:
            print("---------------------")
            print("No label for this image")
            print(f"Lat: {lat}, Long: {long}")
            print("File Name:", self.files_names[idx])
            label = torch.rand(1)
        else:
            label = self.img_labels.iloc[labels_idx.index[0], 1]

        if self.transform:
            image = self.transform(image)
        return image, label

# Define transformations
transform = Compose([
    Resize((224, 224)),  # Match the ViT input size
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Standard normalization for ImageNet-trained models
])

# Create the Dataset
dataset = PovertyDataset(annotations_file='dhs_clusters_2014.csv', img_dir='images-2014', transform=transform)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Replace classification layer with regression

from transformers import ViTForImageClassification, ViTConfig

# Load pre-trained model configuration
config = ViTConfig.from_pretrained('google/vit-base-patch16-224')

# Modify the configuration for 1 output (regression)
config.num_labels = 1

# Load the model with the modified configuration
model = ViTForImageClassification(config)

# Replace the classifier head with a new regression layer
model.classifier = torch.nn.Linear(model.config.hidden_size, 1)

In [4]:
# Prepare dataloader

from torch.utils.data import DataLoader, random_split

# Assuming 'dataset' is your original dataset
total_count = len(dataset)
train_count = int(0.8 * total_count)  # 80% for training
test_count = total_count - train_count  # 20% for testing

# Split the dataset
train_dataset, test_dataset = random_split(dataset, [train_count, test_count])

# Initialize DataLoaders for each dataset
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)



In [5]:
# print shape of data and label for train_dataloader
for data, label in train_dataloader:
    print(data.shape)
    print(label.shape)
    break

torch.Size([32, 3, 224, 224])
torch.Size([32])


In [6]:
import torch.optim as optim
from tqdm import tqdm

# Set the model to training mode
print("Got to model train")
model.train()

# Use GPU if available

print("Using GPU")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print("Setting up optimizer")
# Setup the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-5)

print("Setting up Loss")
# Loss function
criterion = torch.nn.MSELoss()

print("Setting up Training loop")
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}")
    for images, labels in tqdm(train_dataloader, total=len(train_dataloader), desc="Training"):
        images, labels = images.to(device), labels.to(device).unsqueeze(1).float()

        # Forward pass
        outputs = model(images).logits

        # Compute loss
        loss = criterion(outputs, labels)


        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Got to model train
Using GPU
Setting up optimizer
Setting up Loss
Setting up Training loop
Epoch 1


Training:   0%|          | 0/427 [00:00<?, ?it/s]

: 

In [None]:
model.eval()