## Exploring WILDS datasets and models
### FMoW
#### Imports

In [17]:
from wilds import get_dataset
from wilds.common.data_loaders import get_train_loader
import torchvision.transforms as transforms
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch

#### Explore dataset

In [30]:
dataset = get_dataset(dataset="fmow", download=False, root_dir='/mfsnic/u/apouget/data/')
dataset[0]

(<PIL.Image.Image image mode=RGB size=224x224>,
 tensor(57),
 tensor([ 0, 15, 57,  0]))

In [23]:
torch.bincount(dataset.metadata_array[:,9])

tensor([ 73220, 129809])

In [31]:
dataset.source_domain_splits

[0, 1, 2]

In [22]:
dataset[0]

(<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=796x448>,
 tensor(1),
 tensor([   2, 2124, 2013,    6,    1,    6,   55,    0,    1,    0]))

#### Load dataset

To make the following work, a small change is needed in the `wilds` package code. In `<conda_env>/lib/python3.11/site-packages/wilds/datasets/fmow_dataset.py`, add the `format='ISO8601'` argument to each `pd.to_datetime()` function call (3 in total).

In [7]:
dataset = get_dataset(dataset="fmow", download=False, root_dir='/mfsnic/u/apouget/data/')
train_data = dataset.get_subset(
    "train",
    transform=transforms.Compose(
        [transforms.Resize((448, 448)), transforms.ToTensor()]
    ),
)
train_loader = get_train_loader("standard", train_data, batch_size=16) # get_eval_loader if we don't want to shuffle, "group" to sample from groups
for x, y, metadata in train_loader:
    print(x.shape, y.shape, metadata.shape)
    break

torch.Size([16, 3, 448, 448]) torch.Size([16]) torch.Size([16, 4])


In [8]:
dataset.metadata

Unnamed: 0,split,img_filename,img_path,spatial_reference,epsg,category,visible,img_width,img_height,country_code,cloud_cover,timestamp,lat,lon,region,y,year
0,train,tunnel_opening_370_6_rgb.jpg,train/tunnel_opening/tunnel_opening_370/tunnel...,GCS_WGS_1984,4326,tunnel_opening,True,1359,1222,CHN,0,2017-04-12T04:14:15Z,26.604456,101.733373,0,57,15.0
1,train,tunnel_opening_370_1_rgb.jpg,train/tunnel_opening/tunnel_opening_370/tunnel...,GCS_WGS_1984,4326,tunnel_opening,True,900,809,CHN,0,2017-01-30T04:18:04Z,26.604456,101.733373,0,57,15.0
2,train,tunnel_opening_370_0_rgb.jpg,train/tunnel_opening/tunnel_opening_370/tunnel...,GCS_WGS_1984,4326,tunnel_opening,True,1058,951,CHN,1,2015-02-20T04:11:50Z,26.604456,101.733373,0,57,13.0
3,train,tunnel_opening_370_3_rgb.jpg,train/tunnel_opening/tunnel_opening_370/tunnel...,GCS_WGS_1984,4326,tunnel_opening,True,1568,1409,CHN,11,2015-11-23T04:10:38Z,26.604456,101.733373,0,57,13.0
4,train,tunnel_opening_370_4_rgb.jpg,train/tunnel_opening/tunnel_opening_370/tunnel...,GCS_WGS_1984,4326,tunnel_opening,True,1624,1460,CHN,0,2016-04-04T04:12:15Z,26.604456,101.733373,0,57,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523841,seq,burial_site_82_1_rgb.jpg,seq/0010209/0010209_1_rgb.jpg,GCS_WGS_1984,4326,burial_site,True,832,507,NLD,1,2013-05-28T11:22:02Z,52.646049,5.035879,1,8,11.0
523842,seq,burial_site_150_0_rgb.jpg,seq/0004147/0004147_0_rgb.jpg,GCS_WGS_1984,4326,burial_site,True,875,716,JPN,0,2015-03-31T01:47:42Z,35.423879,139.378550,0,8,13.0
523843,seq,burial_site_150_2_rgb.jpg,seq/0004147/0004147_2_rgb.jpg,GCS_WGS_1984,4326,burial_site,True,973,797,JPN,4,2016-01-13T01:28:04Z,35.423879,139.378550,0,8,14.0
523844,seq,burial_site_127_6_rgb.jpg,seq/0007058/0007058_6_rgb.jpg,GCS_WGS_1984,4326,burial_site,True,678,443,FRA,20,2009-10-27T10:47:30Z,49.347681,6.135579,1,8,7.0
