In [1]:
import functools as ft
import io
from argparse import ArgumentParser
from pathlib import Path

import pandas as pd
import numpy as np 

from oml.const import (
    IS_GALLERY_COLUMN,
    IS_QUERY_COLUMN,
    LABELS_COLUMN,
    PATHS_COLUMN,
    SPLIT_COLUMN,
    X1_COLUMN,
    X2_COLUMN,
    Y1_COLUMN,
    Y2_COLUMN,
)
from oml.utils.dataframe_format import check_retrieval_dataframe_format


In [2]:
def get_argparser() -> ArgumentParser:
    parser = ArgumentParser()
    parser.add_argument("--dataset_root", type=Path)
    parser.add_argument("--no_bboxes", action="store_true")
    return parser

In [3]:
def build_cub_df(dataset_root: Path, no_bboxes: bool) -> pd.DataFrame:
    dataset_root = Path(dataset_root)

    images_txt = dataset_root / "images.txt"
    train_test_split = dataset_root / "train_test_split.txt"
    bounding_boxes = dataset_root / "bounding_boxes.txt"
    image_class_labels = dataset_root / "image_class_labels.txt"

    for file in [images_txt, train_test_split, bounding_boxes, image_class_labels]:
        assert file.is_file(), f"File {file} does not exist."

    with open(images_txt, "r") as f:
        images = f.read()
        images = pd.read_csv(io.StringIO(images), delim_whitespace=True, header=None, names=["image_id", "image_name"])

    with open(train_test_split, "r") as f:
        split = f.read()
        split = pd.read_csv(
            io.StringIO(split), delim_whitespace=True, header=None, names=["image_id", "is_training_image"]
        )

    with open(bounding_boxes, "r") as f:
        bbs = f.read()
        bbs = pd.read_csv(
            io.StringIO(bbs), delim_whitespace=True, header=None, names=["image_id", "x", "y", "width", "height"]
        )

    with open(image_class_labels, "r") as f:
        class_labels = f.read()
        class_labels = pd.read_csv(
            io.StringIO(class_labels), delim_whitespace=True, header=None, names=["image_id", "class_id"]
        )

    df = ft.reduce(lambda left, right: pd.merge(left, right, on="image_id"), [images, bbs, class_labels, split])

    #print(df.head())

    df["x_1"] = df["x"].apply(int)  # left
    df["x_2"] = (df["x"] + df["width"]).apply(int)  # right
    df["y_2"] = (df["y"] + df["height"]).apply(int)  # bot
    df["y_1"] = df["y"].apply(int)  # top
    df["path"] = df["image_name"].apply(lambda x: dataset_root / "images" / x)
    

    df["split"] = "train"
    #-------------------print(df["split"].head())
    df["split"][df["is_training_image"] == 0] = "validation" # -----problem here-------------

    df["is_query"] = None
    df["is_gallery"] = None
    df["is_query"][df["split"] == "validation"] = True
    df["is_gallery"][df["split"] == "validation"] = True #------propblem 

    df = df.rename(columns={"class_id": "label"})

    cols_to_pick = ["label", "path", "split", "is_query", "is_gallery"]
    if not no_bboxes:
        cols_to_pick.extend(["x_1", "x_2", "y_1", "y_2"])
    df = df[cols_to_pick]


    df = df.rename(
        columns={
            "label": LABELS_COLUMN,
            "path": PATHS_COLUMN,
            "split": SPLIT_COLUMN,
            "is_query": IS_QUERY_COLUMN,
            "is_gallery": IS_GALLERY_COLUMN,
            "x_1": X1_COLUMN,
            "x_2": X2_COLUMN,
            "y_1": Y1_COLUMN,
            "y_2": Y2_COLUMN,
        }
    )

    if df[LABELS_COLUMN].dtype == np.int64:
        print("its working")
    else:
        print("its not working")

    t = 100
    #print(t.dtype)
    print()
    
    print(df[LABELS_COLUMN].dtype)


    check_retrieval_dataframe_format(df, dataset_root=dataset_root)
    return df


In [12]:
df = build_cub_df("CUB_200_2011", False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["split"][df["is_training_image"] == 0] = "validation" # -----problem here-------------
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_query"][df["split"] == "validation"] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_gallery"][df["split"] == "validation"] = True #------propblem


its working

int64


AssertionError: 

In [10]:
def main() -> None:

    input = "python convert_cub.py --dataset_root=CUB_200_2011"
    
    print("CUB200 2011 dataset preparation started...")
    #args = get_argparser().parse_args()
    args = "CUB_200_2011"

    print("------------- args=== ", args, "-------------")

    print("------- this is args.dataset_root == ", args.dataset_root, "-----------")

    df = build_cub_df(args.dataset_root, args.no_bboxes)


    print("-------done-------")


    fname = "df_no_bboxes" if args.no_bboxes else "df"
    df.to_csv(args.dataset_root / f"{fname}.csv", index=None)
    print("CUB200 2011 dataset preparation completed.")
    print(f"DataFrame saved in {args.dataset_root}\n")



In [11]:

main()

CUB200 2011 dataset preparation started...
------------- args===  CUB_200_2011 -------------


AttributeError: 'str' object has no attribute 'dataset_root'

CUB200 2011 dataset preparation started...
------------- args===  Namespace(dataset_root=WindowsPath('CUB_200_2011'), no_bboxes=False) -------------
------- this is args.dataset_root ==  CUB_200_2011 -----------
its working
int64


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["split"][df["is_training_image"] == 0] = "validation" # -----problem here-------------
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_query"][df["split"] == "validation"] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_gallery"][df["split"] == "validation"] = True #------propblem
Traceback (most recent call last):
  File "g:\project\github\open-metric-learning\testing_local\cub\convert_cub.py", line 140, in <module>
    main()
  File "g:\proje

In [13]:
dataset_root = Path("CUB_200_2011")
no_bboxes = False

images_txt = dataset_root / "images.txt"
train_test_split = dataset_root / "train_test_split.txt"
bounding_boxes = dataset_root / "bounding_boxes.txt"
image_class_labels = dataset_root / "image_class_labels.txt"

for file in [images_txt, train_test_split, bounding_boxes, image_class_labels]:
    assert file.is_file(), f"File {file} does not exist."

with open(images_txt, "r") as f:
    images = f.read()
    images = pd.read_csv(io.StringIO(images), delim_whitespace=True, header=None, names=["image_id", "image_name"])

with open(train_test_split, "r") as f:
    split = f.read()
    split = pd.read_csv(
        io.StringIO(split), delim_whitespace=True, header=None, names=["image_id", "is_training_image"]
    )

with open(bounding_boxes, "r") as f:
    bbs = f.read()
    bbs = pd.read_csv(
        io.StringIO(bbs), delim_whitespace=True, header=None, names=["image_id", "x", "y", "width", "height"]
    )

with open(image_class_labels, "r") as f:
    class_labels = f.read()
    class_labels = pd.read_csv(
        io.StringIO(class_labels), delim_whitespace=True, header=None, names=["image_id", "class_id"]
    )

df = ft.reduce(lambda left, right: pd.merge(left, right, on="image_id"), [images, bbs, class_labels, split])

#print(df.head())

df["x_1"] = df["x"].apply(int)  # left
df["x_2"] = (df["x"] + df["width"]).apply(int)  # right
df["y_2"] = (df["y"] + df["height"]).apply(int)  # bot
df["y_1"] = df["y"].apply(int)  # top
df["path"] = df["image_name"].apply(lambda x: dataset_root / "images" / x)


df["split"] = "train"
#-------------------print(df["split"].head())
df["split"][df["is_training_image"] == 0] = "validation" # -----problem here-------------

df["is_query"] = None
df["is_gallery"] = None
df["is_query"][df["split"] == "validation"] = True
df["is_gallery"][df["split"] == "validation"] = True #------propblem 

df = df.rename(columns={"class_id": "label"})

cols_to_pick = ["label", "path", "split", "is_query", "is_gallery"]
if not no_bboxes:
    cols_to_pick.extend(["x_1", "x_2", "y_1", "y_2"])
df = df[cols_to_pick]


df = df.rename(
    columns={
        "label": LABELS_COLUMN,
        "path": PATHS_COLUMN,
        "split": SPLIT_COLUMN,
        "is_query": IS_QUERY_COLUMN,
        "is_gallery": IS_GALLERY_COLUMN,
        "x_1": X1_COLUMN,
        "x_2": X2_COLUMN,
        "y_1": Y1_COLUMN,
        "y_2": Y2_COLUMN,
    }
)

if df[LABELS_COLUMN].dtype == np.int64:
    print("its working")
else:
    print("its not working")

t = 100
#print(t.dtype)
print()

print(df[LABELS_COLUMN].dtype)



its working

int64


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["split"][df["is_training_image"] == 0] = "validation" # -----problem here-------------
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_query"][df["split"] == "validation"] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["is_gallery"][df["split"] == "validation"] = True #------propblem
