In [1]:
import numpy as np
import cv2
import random
import glob
import PIL
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import defaultdict

In [2]:
root_path = "/mnt/pentagon/xul076"
sketchy_path = os.path.join(root_path, "sketchy/rendered_256x256/256x256")
sketchy_info_path = os.path.join(root_path, "sketchy/info-06-04/info/")

photo_path = "photo/tx_000100000000"
sketch_path = "sketch/tx_000100000000"
full_photo_path = os.path.join(sketchy_path, photo_path)
full_sketch_path = os.path.join(sketchy_path, sketch_path)

photo_files = sorted(glob.glob(os.path.join(full_photo_path, "*/*")))
sketch_files = sorted(glob.glob(os.path.join(full_sketch_path, "*/*")))

In [3]:
invalid_error = set() 
with open(os.path.join(sketchy_info_path, "invalid-error.txt")) as f:
    for line in f:
        invalid_error.add(line.strip())
invalid_pose = set()
with open(os.path.join(sketchy_info_path, "invalid-pose.txt")) as f:
    for line in f:
        invalid_pose.add(line.strip())
invalid_context = set()
with open(os.path.join(sketchy_info_path, "invalid-context.txt")) as f:
    for line in f:
        invalid_context.add(line.strip())
invalid_ambiguous = set()
with open(os.path.join(sketchy_info_path, "invalid-ambiguous.txt")) as f:
    for line in f:
        invalid_ambiguous.add(line.strip())

In [4]:
with open(os.path.join(sketchy_info_path, "testset.txt")) as f:
    test_set = f.readlines()
test_set = [x.strip() for x in test_set]

In [5]:
# for pose in poses:
#     fig, axes = plt.subplots(1, 6)
#     for p in photos:
#         if pose[:-2] in p:
#             axes[0].imshow(PIL.Image.open(p))
#     i = 1
#     for s in sketches:
#         if pose[:-2] in s:
#             axes[i].imshow(PIL.Image.open(s))
#             i += 1
#             if i == 6:
#                 break
#     plt.show()

In [6]:
sketchy_meta = []
for sketch_file in tqdm(sketch_files):
    sketch_file_split = sketch_file.split("/")
    sketch_name = sketch_file_split[-1][:-4]
    sketch_file = "/".join(sketch_file_split[4:])
    category = sketch_file_split[9]
    
    photo_name = sketch_name.split("-")[0]
    for photo_file in photo_files:
        if photo_name in photo_file:
            photo_file = "/".join(photo_file.split("/")[4:])
            break
    
    split = "train"
    for test in test_set:
        if test in photo_file:
            split = "test"
            break
    
    if sketch_name in invalid_error:
        invalid = "error"
    elif sketch_name in invalid_pose:
        invalid = "pose"
    elif sketch_name in invalid_context:
        invalid = "context"
    elif sketch_name in invalid_ambiguous:
        invalid = "ambiguous"
    else:
        invalid = "valid"
    sketchy_meta.append([category, photo_name, sketch_name, photo_file, sketch_file, invalid, split])

100%|████████████████████████| 75481/75481 [00:40<00:00, 1854.67it/s]


In [7]:
pd.options.display.max_colwidth = 1000
sketchy_meta = pd.DataFrame(sketchy_meta, columns=["class", "photo", "sketch", "photo_path", "sketch_path", "validity", "split"])
sketchy_meta

Unnamed: 0,class,photo,sketch,photo_path,sketch_path,validity,split
0,airplane,n02691156_10151,n02691156_10151-1,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-1.png,pose,test
1,airplane,n02691156_10151,n02691156_10151-2,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-2.png,ambiguous,test
2,airplane,n02691156_10151,n02691156_10151-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-3.png,valid,test
3,airplane,n02691156_10151,n02691156_10151-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-4.png,ambiguous,test
4,airplane,n02691156_10151,n02691156_10151-5,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-5.png,valid,test
...,...,...,...,...,...,...,...
75476,zebra,n02391049_9960,n02391049_9960-1,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-1.png,valid,test
75477,zebra,n02391049_9960,n02391049_9960-2,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-2.png,valid,test
75478,zebra,n02391049_9960,n02391049_9960-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-3.png,valid,test
75479,zebra,n02391049_9960,n02391049_9960-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-4.png,valid,test


In [8]:
for sketch_path in sketchy_meta["sketch_path"].unique():
    if not os.path.exists(os.path.join(root_path, sketch_path)):
        print(sketch_path, "not exists")
        break
for photo_path in sketchy_meta["photo_path"].unique():
    if not os.path.exists(os.path.join(root_path, photo_path)):
        print(photo_path, "not exists")
        break

In [9]:
sketchy_meta = sketchy_meta[(sketchy_meta["validity"] != "error") & (sketchy_meta["validity"] != "pose")]

In [10]:
sketchy_train_meta = sketchy_meta[sketchy_meta["split"] == "train"]
sketchy_test_meta = sketchy_meta[sketchy_meta["split"] == "test"]
# sketchy_train_meta = sketchy_train_meta[sketchy_train_meta["validity"] != "pose"]

In [11]:
sketchy_train_meta["sketch_idx"] = sketchy_train_meta.groupby("photo").cumcount()
temp = sketchy_train_meta[["class", "photo"]].drop_duplicates()
temp["photo_idx"] = temp.groupby(["class"]).cumcount()
sketchy_train_meta = sketchy_train_meta.merge(temp, "left")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sketchy_train_meta["sketch_idx"] = sketchy_train_meta.groupby("photo").cumcount()


In [12]:
sketchy_test_meta["sketch_idx"] = sketchy_test_meta.groupby("photo").cumcount()
sketchy_test_meta = sketchy_test_meta[sketchy_test_meta["sketch_idx"] <= 4]
print(sketchy_test_meta["sketch_idx"].value_counts())
temp = sketchy_test_meta[["class", "photo"]].drop_duplicates()
temp["photo_idx"] = temp.groupby(["class"]).cumcount()
sketchy_test_meta = sketchy_test_meta.merge(temp, "left")

0    1250
1    1250
2    1250
3    1250
4    1250
Name: sketch_idx, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sketchy_test_meta["sketch_idx"] = sketchy_test_meta.groupby("photo").cumcount()


In [13]:
temp = sketchy_test_meta[["class", "photo"]].drop_duplicates()
temp["photo_idx"] = temp.groupby(["class"]).cumcount()
sketchy_test_meta = sketchy_test_meta.merge(temp, "left")

In [14]:
sketchy_train_meta

Unnamed: 0,class,photo,sketch,photo_path,sketch_path,validity,split,sketch_idx,photo_idx
0,airplane,n02691156_10153,n02691156_10153-1,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10153.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10153-1.png,valid,train,0,0
1,airplane,n02691156_10153,n02691156_10153-2,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10153.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10153-2.png,valid,train,1,0
2,airplane,n02691156_10153,n02691156_10153-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10153.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10153-3.png,valid,train,2,0
3,airplane,n02691156_10153,n02691156_10153-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10153.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10153-4.png,ambiguous,train,3,0
4,airplane,n02691156_10153,n02691156_10153-5,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10153.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10153-5.png,ambiguous,train,4,0
...,...,...,...,...,...,...,...,...,...
65059,zebra,n02391049_9918,n02391049_9918-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9918.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9918-3.png,ambiguous,train,2,89
65060,zebra,n02391049_9918,n02391049_9918-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9918.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9918-4.png,ambiguous,train,3,89
65061,zebra,n02391049_9918,n02391049_9918-5,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9918.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9918-5.png,valid,train,4,89
65062,zebra,n02391049_9918,n02391049_9918-6,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9918.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9918-6.png,valid,train,5,89


In [15]:
sketchy_test_meta

Unnamed: 0,class,photo,sketch,photo_path,sketch_path,validity,split,sketch_idx,photo_idx
0,airplane,n02691156_10151,n02691156_10151-2,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-2.png,ambiguous,test,0,0
1,airplane,n02691156_10151,n02691156_10151-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-3.png,valid,test,1,0
2,airplane,n02691156_10151,n02691156_10151-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-4.png,ambiguous,test,2,0
3,airplane,n02691156_10151,n02691156_10151-5,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-5.png,valid,test,3,0
4,airplane,n02691156_10151,n02691156_10151-6,sketchy/rendered_256x256/256x256/photo/tx_000100000000/airplane/n02691156_10151.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/airplane/n02691156_10151-6.png,valid,test,4,0
...,...,...,...,...,...,...,...,...,...
6245,zebra,n02391049_9960,n02391049_9960-1,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-1.png,valid,test,0,9
6246,zebra,n02391049_9960,n02391049_9960-2,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-2.png,valid,test,1,9
6247,zebra,n02391049_9960,n02391049_9960-3,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-3.png,valid,test,2,9
6248,zebra,n02391049_9960,n02391049_9960-4,sketchy/rendered_256x256/256x256/photo/tx_000100000000/zebra/n02391049_9960.jpg,sketchy/rendered_256x256/256x256/sketch/tx_000100000000/zebra/n02391049_9960-4.png,valid,test,3,9


In [16]:
sketchy_train_meta.to_csv(os.path.join(root_path, "sketchy", "sketchy_train_meta.csv"))
sketchy_train_meta.to_csv(os.path.join(".", "sketchy_train_meta.csv"))

In [17]:
sketchy_test_meta.to_csv(os.path.join(root_path, "sketchy", "sketchy_test_meta.csv"))
sketchy_test_meta.to_csv(os.path.join(".", "sketchy_test_meta.csv"))

In [18]:
sketchy_fam_meta = [
    ["pantheon", "pantheon_photo.jpg", "pantheon_sketch.jpg", "fam"],
    ["moon", "moon_photo.jpg", "moon_sketch.jpg", "fam"],
    ["bridge", "bridge_photo.jpg", "bridge_sketch.jpg", "fam"],
]
sketchy_fam_meta = pd.DataFrame(sketchy_fam_meta, columns=["class", "photo_path", "sketch_path", "split"])

In [19]:
sketchy_fam_meta

Unnamed: 0,class,photo_path,sketch_path,split
0,pantheon,pantheon_photo.jpg,pantheon_sketch.jpg,fam
1,moon,moon_photo.jpg,moon_sketch.jpg,fam
2,bridge,bridge_photo.jpg,bridge_sketch.jpg,fam


In [20]:
sketchy_fam_meta.to_csv(os.path.join(root_path, "sketchy", "sketchy_fam_meta.csv"))
sketchy_fam_meta.to_csv(os.path.join(".", "sketchy_fam_meta.csv"))

In [21]:
sketchy_catch_meta = [
    ["leaf", "leaf_photo.jpg", "leaf_sketch.jpg", "catch"],
    ["sun", "sun_photo.jpg", "sun_sketch.jpg", "catch"],
    ["opera", "opera_photo.jpg", "opera_sketch.jpg", "catch"],
    ["eiffel", "eiffel_photo.jpg", "eiffel_sketch.jpg", "catch"],
    ["sunflower", "sunflower_photo.jpg", "sunflower_sketch.jpg", "catch"],
]
sketchy_catch_meta = pd.DataFrame(sketchy_catch_meta, columns=["class", "photo_path", "sketch_path", "split"])
sketchy_catch_meta

Unnamed: 0,class,photo_path,sketch_path,split
0,leaf,leaf_photo.jpg,leaf_sketch.jpg,catch
1,sun,sun_photo.jpg,sun_sketch.jpg,catch
2,opera,opera_photo.jpg,opera_sketch.jpg,catch
3,eiffel,eiffel_photo.jpg,eiffel_sketch.jpg,catch
4,sunflower,sunflower_photo.jpg,sunflower_sketch.jpg,catch


In [22]:
sketchy_catch_meta.to_csv(os.path.join(root_path, "sketchy", "sketchy_catch_meta.csv"))
sketchy_catch_meta.to_csv(os.path.join(".", "sketchy_catch_meta.csv"))