In [1]:
import metadataparity
import numpy as np
import pandas as pd
from fmow_utils import extrinsic_factors_fmow, get_fmow_boxes
from intrinsic_factors import intrinsic_factors_xywh

ModuleNotFoundError: No module named 'pandas'

In [None]:
demo_classes = [
    "airport",
    "border_checkpoint",
    "dam",
    "factory_or_powerplant",
    "hospital",
    "military_facility",
    "nuclear_powerplant",
    "oil_or_gas_facility",
    "place_of_worship",
    "port",
    "prison",
    "stadium",
    "electric_substation",
    "road_bridge",
]

split_name = "op"
country_code = "RUS"
# load precomputed table of FMOW labels and metadata
df = pd.read_pickle("../trainval_labels_factors.pkl").reset_index(drop=True)
df["class"] = df["class"].astype("category")
df["split"] = df.split.astype("category")


df = df[df.country_code == "RUS"]

df = df[df["class"].isin(demo_classes)]

# xywh
boxes = get_fmow_boxes(df)
img_sizes = np.column_stack((df.img_width.to_numpy(), df.img_height.to_numpy()))

# gather intrinsic factors (dataset agnostic)
int_fmow, int_categorical = intrinsic_factors_xywh(boxes, img_sizes)

# gather extrinsic factors (custom to FMOW)
ext_fmow, ext_categorical = extrinsic_factors_fmow(df)

# class labels
cls_fmow = {"class": df["class"].to_numpy()}
cls_categorical = {"class": True}

# combine factors
factors = {**cls_fmow, **int_fmow, **ext_fmow}
is_categorical = {**cls_categorical, **int_categorical, **ext_categorical}
# match insertion order --- done in MetadataBias class as well
is_categorical = {key: is_categorical[key] for key in factors}

# map non-numeric variables to integers
orig_class = factors["class"]

In [None]:
continuous_factors = [
    "box_width",
    "box_height",
    "box_area",
    "box_aspect_ratio",
    "dist_to_center",
    "dist_to_edge",
    "day_of_year",
    "gsd",
    "cloud_cover",
    "target_azimuth_dbl",
    "sun_azimuth_dbl",
    "sun_elevation_dbl",
    "off_nadir_angle_dbl",
]
discrete_factors = ["month", "season", "utm", "country_code"]

mdp = metadataparity.MetadataParity()
mdp.set_factors(factors, continuous_factors)

In [None]:
chi_matrix, p_matrix = mdp.evaluate()

In [None]:
# Format output
results = {}
for i, f in enumerate(mdp.all_factor_names):
    results[f] = f"chisquare={chi_matrix[i]}, p={p_matrix[i]}"
print(results)