# Pre-compute statistics of data

These stats are used for normalizing the data, but can get quite expensive, so pre-computing it is easier.

In [1]:
import torch
import torchvision
from glob import glob

## Provided dataset

In [2]:
imgs = []
for filename in glob("../data/*/images/*.png"):
    img = torchvision.io.read_image(filename, torchvision.io.ImageReadMode.RGB)
    imgs.append(img.unsqueeze(0).float() / 255)
    
imgs = torch.cat(imgs)

In [3]:
imgs.shape

torch.Size([288, 3, 400, 400])

## External Google Maps dataset

In [5]:
google_imgs = []
for filename in glob("../scraped_data/images/*.png"):
    img = torchvision.io.read_image(filename, torchvision.io.ImageReadMode.RGB)
    google_imgs.append(img.unsqueeze(0).float() / 255)
    
google_imgs = torch.cat(google_imgs)

In [6]:
google_imgs.shape

torch.Size([1980, 3, 400, 400])

In [7]:
imgs = torch.cat([imgs, google_imgs])

In [8]:
imgs.shape

torch.Size([2268, 3, 400, 400])

In [10]:
mean = imgs.mean(dim=[0, 2, 3])
std = imgs.std(dim=[0, 2, 3])
print("Mean:", mean)
print("Std:", std)

Mean: tensor([0.4731, 0.4785, 0.4654])
Std: tensor([0.2268, 0.2076, 0.1975])
