In [95]:
from pathlib import Path

import numpy as np

from vgn.io import *
from vgn.perception import *
from vgn.assign_grasp_affordance import affrdnce_label_dict
from vgn.utils.implicit import semantic_label_dict

In [96]:
root = Path('../data/3DGraspAff/data_affnet_train_random_raw_GPG_60')
df = read_df_with_surface_clouds(root)

In [97]:
# Grasp label balance
positives = df[df["label"] == 1]
negatives = df[df["label"] == 0]
print("Positives: {}, Negatives: {}".format(len(positives), len(negatives)))

Positives: 967777, Negatives: 967650


In [98]:
def check_balance(positives):
    # Check affordance label balance
    # only the positives have an affordance label
    for aff_key in affrdnce_label_dict.keys():
        affrdnce_positives = positives[positives[aff_key] == 1]
        affrdnce_negatives = positives[positives[aff_key] == 0]
        print("Affordance: {}, Positives: {}, Negatives: {}, Pos_ratio: {}".format(
            aff_key, len(affrdnce_positives), len(affrdnce_negatives), len(affrdnce_positives) / (len(affrdnce_positives)+len(affrdnce_negatives))))

check_balance(positives)

Affordance: handover, Positives: 525106, Negatives: 442671, Pos_ratio: 0.5425898734935838
Affordance: cut, Positives: 157387, Negatives: 810390, Pos_ratio: 0.16262734080268493
Affordance: stab, Positives: 157387, Negatives: 810390, Pos_ratio: 0.16262734080268493
Affordance: lift, Positives: 50334, Negatives: 917443, Pos_ratio: 0.05200991550739478
Affordance: wrap, Positives: 103662, Negatives: 864115, Pos_ratio: 0.10711351892016446
Affordance: pour, Positives: 261693, Negatives: 706084, Pos_ratio: 0.27040630227831414
Affordance: wear, Positives: 70935, Negatives: 896842, Pos_ratio: 0.07329684421101142


In [99]:
# Num of positives with no affordance label:
num_no_aff = positives[affrdnce_label_dict.keys()].sum(axis=1).value_counts()[0]
num_no_aff

29005

In [100]:
# Num of positives that are not handovers but are something else:
not_handovers = positives[positives['handover'] == 0]
something_else = not_handovers[affrdnce_label_dict.keys()].sum(axis=1).value_counts()
something_else

1    250791
2    162844
0     29005
3        31
dtype: int64

In [101]:
# BALANCE THE DATASET
new_dataset_size = 3000000
new_num_positives = new_dataset_size//2
num_aff_classes = len(affrdnce_label_dict.keys())
new_num_positives_per_aff_class = new_num_positives // num_aff_classes
# copy the df
new_df = df.copy()
for aff_key in ['pour', 'lift', 'wear', 'wrap', 'cut', 'stab', 'pour']:#affrdnce_label_dict.keys():
    affrdnce_positives = new_df[new_df[aff_key] == 1]
    affrdnce_negatives = new_df[new_df[aff_key] == 0]
    if len(affrdnce_positives) > new_num_positives_per_aff_class:
        # randomly drop positives
        new_df = new_df.drop(affrdnce_positives.sample((len(affrdnce_positives) - new_num_positives_per_aff_class)).index)
    else:
        # oversample the positives
        new_df = new_df.append(affrdnce_positives.sample(new_num_positives_per_aff_class - len(affrdnce_positives), replace=True), ignore_index=True)

check_balance(positives=new_df[new_df["label"] == 1])


Affordance: handover, Positives: 739590, Negatives: 583132, Pos_ratio: 0.5591424350695007
Affordance: cut, Positives: 214282, Negatives: 1108440, Pos_ratio: 0.16200078323336273
Affordance: stab, Positives: 214282, Negatives: 1108440, Pos_ratio: 0.16200078323336273
Affordance: lift, Positives: 214387, Negatives: 1108335, Pos_ratio: 0.16208016499309758
Affordance: wrap, Positives: 171938, Negatives: 1150784, Pos_ratio: 0.12998800957419623
Affordance: pour, Positives: 214285, Negatives: 1108437, Pos_ratio: 0.16200305128364084
Affordance: wear, Positives: 214305, Negatives: 1108417, Pos_ratio: 0.16201817161882845


In [102]:
# write df to file
new_df.to_csv(root / "grasps_aff_balanced.csv", index=False)