## Given a 365 way scene classifier, obtain logits for 16-way scene category classifier


In [34]:
import os, sys
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm

sys.path.insert(0, 'src')
from utils.places365_pred_utils import get_class_category_dict
from utils.utils import ensure_dir

### Split Places 365 into train-val

In [33]:
# Set variables
places365_path_dirpath = os.path.join('data', 'Places365')
index_path = os.path.join(places365_path_dirpath, 'places365_val.txt')
train_split = 0.6
seed = 0 

data_save_dir = os.path.join('data', 'places365_categories')
data_save_path = os.path.join(data_save_dir, 'places365_imagelabels.pth')

In [31]:
# Load dictionary
class_category_dict = get_class_category_dict()

df = pd.read_csv(index_path, delimiter=' ', header=None)
df = df.rename(columns={0: 'filename', 1: 'label'})
image_paths = df['filename'].tolist()
scene_labels = df['label'].tolist()
scene_category_labels = [class_category_dict[scene_label] for scene_label in scene_labels]

# Store labels in dictionaries
scene_labels_dict = {}
scene_category_labels_dict = {}
# Populate dictionaries
for image_path, scene_label, scene_category_label in zip(image_paths, scene_labels, scene_category_labels):
    scene_labels_dict[image_path] = scene_label
    scene_category_labels_dict[image_path] = scene_category_label
# Store in data object
save_data = {}
save_data['scene_labels'] = scene_labels_dict
save_data['scene_category_labels'] = scene_category_labels_dict


# Randomly split val_train and val_val
train_df = df.sample(frac=train_split, random_state=seed)
val_df = df.drop(train_df.index)
print(len(train_df), len(val_df))
print(df[0:5])

print(train_df.columns, val_df.columns)

# Assert no overlap between train and val
assert len(pd.merge(train_df, val_df, how='inner', on=['filename', 'label'])) == 0

# Store image paths to save data
save_data['val_train'] = train_df['filename'].tolist()
save_data['val_val'] = val_df['filename'].tolist()

# Sanity checks
for image_path in save_data['val_train']:
    assert image_path in save_data['scene_labels']
    assert image_path in save_data['scene_category_labels']
for image_path in save_data['val_val']:
    assert image_path in save_data['scene_labels']
    assert image_path in save_data['scene_category_labels']


21900 14600
                     filename  label
0  Places365_val_00000001.jpg    165
1  Places365_val_00000002.jpg    358
2  Places365_val_00000003.jpg     93
3  Places365_val_00000004.jpg    164
4  Places365_val_00000005.jpg    289
Index(['filename', 'label'], dtype='object') Index(['filename', 'label'], dtype='object')


In [35]:
ensure_dir(data_save_dir)
if os.path.exists(data_save_path):
    print("Path {} exists. Aborting".format(data_save_path))
else:
    torch.save(save_data, data_save_path)
    print("Saved data to {}".format(data_save_path))

Saved data to data/places365_categories/places365_imagelabels.pth
