In [13]:
import requests
from PIL import Image
import io
# import tensorflow as tf
import torch
import datetime
from pathlib import Path
from datasets import DatasetDict
from datasets import Dataset
import time
from tqdm import tqdm

In [14]:
from datasets import load_dataset

# this line is just for quick testin
dataset = load_dataset("stochastic/random_streetview_images_pano_v0.0.2", split="train[:200]")
# dataset = load_dataset("stochastic/random_streetview_images_pano_v0.0.2", split="train")

In [15]:
# preprocessing - this took 6min 48 seconds on my computer... jk 12 min second time
from torchvision.transforms import v2
import torchvision.transforms as transforms

# data preprocessing
image_transforms = v2.Compose([
    v2.CenterCrop((561, 1010)),  # crops the middle image from the panorama
    v2.Resize((224, 224)),  # resizing the image to 224x224 for easier processing
    transforms.ToTensor()  # converting the image to a tensor
])

def transform(batch):
    # Transform each image in the batch and ensure it has 3 channels
    batch["image"] = [image_transforms(image.convert("RGB")) for image in batch["image"]]
    del batch["latitude"]
    del batch["longitude"]
    del batch["address"]
    return batch

# Apply the transformations to the dataset
dataset = dataset.map(transform, batched=True, batch_size=8)


In [16]:
# check what the name of the country thing is
print(dataset[0])

{'image': [[[0.5647059082984924, 0.6000000238418579, 0.6666666865348816, 0.4470588266849518, 0.3803921639919281, 0.45490196347236633, 0.4156862795352936, 0.5137255191802979, 0.5843137502670288, 0.5843137502670288, 0.48235294222831726, 0.5137255191802979, 0.5764706134796143, 0.5607843399047852, 0.5176470875740051, 0.5058823823928833, 0.5882353186607361, 0.7098039388656616, 0.729411780834198, 0.6039215922355652, 0.6941176652908325, 0.658823549747467, 0.6941176652908325, 0.7607843279838562, 0.7450980544090271, 0.8078431487083435, 0.843137264251709, 0.8509804010391235, 0.6980392336845398, 0.772549033164978, 0.7647058963775635, 0.7882353067398071, 0.7882353067398071, 0.6196078658103943, 0.6509804129600525, 0.6392157077789307, 0.6078431606292725, 0.6980392336845398, 0.6941176652908325, 0.6784313917160034, 0.658823549747467, 0.6431372761726379, 0.6274510025978088, 0.6235294342041016, 0.6196078658103943, 0.6078431606292725, 0.5921568870544434, 0.5686274766921997, 0.43921568989753723, 0.5215686

In [17]:
# Make split and save
train_testvalid = dataset.train_test_split(test_size=0.4)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)

datasets = DatasetDict({
    'train': train_testvalid['train'],
    'test': test_valid['test'],
    'valid': test_valid['train']
})
datasets.save_to_disk("./data")

Saving the dataset (0/1 shards):   0%|          | 0/120 [00:00<?, ? examples/s]

Saving the dataset (1/1 shards): 100%|██████████| 120/120 [00:00<00:00, 216.51 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 40/40 [00:00<00:00, 156.01 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 40/40 [00:00<00:00, 163.81 examples/s]


In [18]:
country_codes = ["ZA","KR","AR","BW","GR","SK","HK","NL","PE","AU","KH","LT","NZ","RO","MY","SG","AE","FR","ES","IT","IE","LV","IL","JP","CH","AD","CA","RU","NO","SE","PL","TW","CO","BD","HU","CL","IS","BG","GB","US","SI","BT","FI","BE","EE","SZ","UA","CZ","BR","DK","ID","MX","DE","HR","PT","TH"]
country_dict = {}
# TODO: these might need to be tensor arrays but thats easy enough to change if needed
for i in range(len(country_codes)):
    country_dict[country_codes[i]] = [0]*len(country_codes)
    country_dict[country_codes[i]][i] = 1
# print(country_dict)


In [32]:
# referenced: https://blog.paperspace.com/convolutional-autoencoder/
# autoencoder classes (CREDIT: LARGELY TAKEN FROM 6_AUTOENCODER NOTEBOOK, but encoder and decoder architectures modified to be convolutional)
# should only have Encoder that has a latent dimension of 50 - corresponding to country weights
import torch.nn as nn
import torch.nn.functional as F

class MLPEncoder(torch.nn.Module):

    def __init__(self,
                 number_of_hidden_layers: int,
                 latent_size: int,
                 hidden_size: int,
                 input_size: int,
                 activation: torch.nn.Module):

        super().__init__()

        self.latent_size = latent_size
        assert number_of_hidden_layers >= 0, "Decoder number_of_hidden_layers must be at least 0"

       # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=2, padding=1)
        # Second convolutional layer
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=2, padding=1)
        
        # Calculate the size of the feature maps after the second convolutional layer
        # Assuming a 224x224 input, the feature map size will be 56x56 after the first layer (stride=2)
        # and 28x28 after the second layer (also stride=2)
        self.feature_map_size = 32 * 28 * 28  # 32 channels, 28x28 spatial size
        
        # Fully connected layer to produce the latent representation of size 55
        self.fc = nn.Linear(self.feature_map_size, latent_size)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply the first convolutional layer and ReLU activation
        x = F.relu(self.conv2(x))  # Apply the second convolutional layer and ReLU activation
        x = x.view(-1, self.feature_map_size)  # Flatten the output
        x = self.fc(x)  # Apply the fully connected layer
        return x
        print(x)


In [33]:
# define our training parameters and model
hidden_layers = 4
hidden_size = 30

latent_size = 55
## this might need to change
input_size = 224
lr = 0.001
# lambda weight for classifier's loss
lamb = 1

# fix random seed
torch.manual_seed(0)

# select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPEncoder( number_of_hidden_layers=hidden_layers,
                 latent_size=latent_size,
                 hidden_size=hidden_size,
                 input_size=input_size,
                 activation=torch.nn.ReLU()).to(device)

# use an optimizer to handle parameter updates
opt = torch.optim.Adam(model.parameters(), lr=lr)

# save all log data to a local directory
run_dir = "logs"

# to clear out TensorBoard and start totally fresh, we'll need to
# remove old logs by deleting them from the directory
!rm -rf ./logs/

# timestamp the logs for each run so we can sort through them
run_time = datetime.datetime.now().strftime("%I%M%p on %B %d, %Y")

# initialize a SummaryWriter object to handle all logging actions
from torch.utils.tensorboard import SummaryWriter
logger = SummaryWriter(log_dir=Path(run_dir) / run_time, flush_secs=20)

In [34]:
# load data from disk
data = datasets.load_from_disk("./data")
data = data.with_format("torch")

In [35]:
# test the datatype of the dataset - the datasets['train']['image'] should be a tensor
# type(datasets['train'][0]['image'])
# datasets['train'][0]['image']

datasets['train'][0]
# print(len(datasets['train'][0]['image'][0]))
# print(len(datasets['train'][0]['image'][0][0]))


{'image': [[[0.7254902124404907,
    0.7254902124404907,
    0.7254902124404907,
    0.7215686440467834,
    0.7176470756530762,
    0.7098039388656616,
    0.7098039388656616,
    0.7098039388656616,
    0.7176470756530762,
    0.7215686440467834,
    0.7215686440467834,
    0.7215686440467834,
    0.7137255072593689,
    0.7176470756530762,
    0.7176470756530762,
    0.7176470756530762,
    0.7137255072593689,
    0.7176470756530762,
    0.7176470756530762,
    0.7098039388656616,
    0.7098039388656616,
    0.7176470756530762,
    0.7176470756530762,
    0.7137255072593689,
    0.7098039388656616,
    0.7058823704719543,
    0.7019608020782471,
    0.7019608020782471,
    0.6980392336845398,
    0.7019608020782471,
    0.7019608020782471,
    0.6980392336845398,
    0.6980392336845398,
    0.6941176652908325,
    0.7019608020782471,
    0.7058823704719543,
    0.7098039388656616,
    0.7058823704719543,
    0.7019608020782471,
    0.6980392336845398,
    0.6980392336845398,
    0.6

In [36]:
# (***credit***: mostly taken from provided notebook )
# training

epochs = 100
start_time = time.time()
loss_history = []
valid_history = []
acc_history = []
valid_acc_history = []
report_every = 5
Loss = torch.nn.BCELoss()



for epoch in range(epochs):

    # weight batch losses/scores proportional to batch size
    iter_count = 0
    valid_iter_count = 0
    loss_epoch = 0
    class_accuracy_epoch = 0
    valid_loss_epoch = 0
    valid_accuracy_epoch = 0
    ###
    ### IMAGE_DATA_TRAIN is training data, shape is 610 x 3 x 64 x 64
    ### 
    
   # print(batched_image_data_train[0][0].shape)

    # test with literally only one image
    #batched_image_data_train = [batched_image_data_train[0][0].unsqueeze(0)]
    for idx, img in enumerate(datasets['train']):
        
        x = img['image']
        x = torch.tensor(x)
        # print(x)
        # flatten input images and move to device\
        # *****
        x = x / 255
        # plot x_real later to see if this is correct
        x = x.to(device)
        model.zero_grad()

        # train on a batch of inputs
        pred_labels = model(x)

        # get the true label
        label = torch.tensor(country_dict[img['country_iso_alpha2']], dtype=torch.float).to(device)
        loss = Loss(label, pred_labels)
        loss.backward()
        opt.step()

        # log loss
        loss_epoch += loss.detach().item()    

        # classification accuracy
        # add 1 to class_accuracy_epoch if the classification is correct, else 0
        # find index of max probability from pred_labels
        c = torch.argmax(pred_labels)
        # find index of 1 from label
        true_class = torch.argmax(label)

        acc = (true_class == c)
        class_accuracy_epoch += acc
        iter_count += 1
        # print(f"true: {true_class}")
        # print(f"pred: {classification.mean()}")
        # print(f"acc: {acc}")

    # plot loss
    loss_epoch 
    class_accuracy_epoch /= iter_count #accuracy as a percent
   # print(iter_count)
    logger.add_scalar("mse_loss", loss_epoch, epoch)
    loss_history.append(loss_epoch)
    acc_history.append(class_accuracy_epoch) 
 
            
            # logger.add_scalar("mse_loss_valid", valid_loss_epoch, epoch)
    # # plot example generated images
    # with torch.no_grad():
    #     reconstructed_batch = model(example_batch.reshape(batch_size, -1)).reshape(batch_size, 1, image_size, image_size)
    #     logger.add_image("reconstructed_images", make_grid(reconstructed_batch, math.floor(math.sqrt(batch_size)), title="Reconstructed Images"), epoch)
        # calculate validation loss

    with torch.no_grad():
        for valid_idx, valid_data in enumerate(datasets['valid']):
            x_valid = valid_data['image']#.float()
            x_valid = x_valid / 255
            x_valid = x_valid.to(device)
            pred_labels = model(x_valid)

            label_valid = country_dict[valid_data['country_code']]
            valid_loss = Loss(label_valid, pred_labels)
            valid_loss_epoch += valid_loss.detach().item()

            
            # classification accuracy
            # add 1 to class_accuracy_epoch if the classification is correct, else 0
            c = torch.argmax(pred_labels) # find index of max probability from pred_labels
            true_class = torch.argmax(label) # find index of 1 from label
            
            valid_acc = (true_class == c) 
            valid_accuracy_epoch += valid_acc
            valid_iter_count += 1
            # print(f"true: {true_class}")
            # print(f"pred: {classification.mean()}")
            # print(f"acc: {valid_acc}")
        valid_loss_epoch 
        valid_history.append(valid_loss_epoch)
        valid_accuracy_epoch /= valid_iter_count
        valid_acc_history.append(valid_accuracy_epoch)

    if (epoch + 1) % report_every == 0:
        mins = (time.time() - start_time) / 60
        print(f"Epoch: {epoch + 1:5d}\tMSE Loss: {loss_epoch :6.4f}\t in {mins:5.1f}min")
        print()

ValueError: Using a target size (torch.Size([4, 55])) that is different to the input size (torch.Size([56])) is deprecated. Please ensure they have the same size.