# Extract red (bad) features

In [9]:
from openpyxl import load_workbook

# Load workbook
wb = load_workbook("../data/data_description_colored.xlsx")
ws = wb.active

# Read the cell with bad feature
cell = ws["A26"]
# Extract fill color
fill = cell.fill

# Color stored here:
# fill.start_color.rgb  → returns ARGB (e.g., FF5EB91E)
# fill.start_color.indexed → for indexed palette colors
# fill.start_color.theme → theme color index

color = fill.start_color

if color.type == "rgb":
    print("RGB/ARGB:", color.rgb)

elif color.type == "indexed":
    print("Indexed color:", color.indexed)

elif color.type == "theme":
    print("Theme color:", color.theme, "with tint:", color.tint)

else:
    print("Unknown color type", color.type)


RGB/ARGB: FFC9211E


In [14]:
TARGET_COLOR = str(color.rgb)  # red color used for bad features

In [16]:
from openpyxl import load_workbook
import pandas as pd
import numpy as np

def load_feature_groups(path):
    wb = load_workbook(path, data_only=True)
    ws = wb.active

    def get_color(cell):
        if cell.fill.patternType is None:
            return None
        return cell.fill.fgColor.rgb

    good = []
    bad = []

    for row in ws.iter_rows(min_row=2, values_only=False):  # skip header
        feature_name = row[1].value      # column B = feature name in Dutch
        color = get_color(row[1])        

        if color == TARGET_COLOR:
            bad.append(feature_name)
        else:
            good.append(feature_name)

    return good, bad

good_features, bad_features = load_feature_groups("../data/data_description_colored.xlsx")
good_features, bad_features


(['adres_aantal_brp_adres',
  'adres_aantal_verschillende_wijken',
  'adres_aantal_verzendadres',
  'adres_aantal_woonadres_handmatig',
  'adres_dagen_op_adres',
  'adres_recentst_onderdeel_rdam',
  'adres_recentste_buurt_groot_ijsselmonde',
  'adres_recentste_buurt_nieuwe_westen',
  'adres_recentste_buurt_other',
  'adres_recentste_buurt_oude_noorden',
  'adres_recentste_buurt_vreewijk',
  'adres_recentste_plaats_other',
  'adres_recentste_plaats_rotterdam',
  'adres_recentste_wijk_charlois',
  'adres_recentste_wijk_delfshaven',
  'adres_recentste_wijk_feijenoord',
  'adres_recentste_wijk_ijsselmonde',
  'adres_recentste_wijk_kralingen_c',
  'adres_recentste_wijk_noord',
  'adres_recentste_wijk_other',
  'adres_recentste_wijk_prins_alexa',
  'adres_recentste_wijk_stadscentru',
  'adres_unieke_wijk_ratio',
  'afspraak_aanmelding_afgesloten',
  'afspraak_afgelopen_jaar_afsprakenplan',
  'afspraak_afgelopen_jaar_monitoring_insp__wet_taaleis_na_12_mnd_n_a_v__taa04_____geen_maatregel',
  '

In [17]:
import json

good_bad_dict = {
    "good_features": good_features,
    "bad_features": bad_features
}

with open("feature_groups.json", "w") as f:
    json.dump(good_bad_dict, f, indent=4)
