# 02_01_build_tag_id_features preview

Quick inspection of the tag lookup table and the train/test wide matrices generated in `data/interim/02_01_build_tag_id_features/`.



In [1]:
from collections import OrderedDict
from pathlib import Path

import pandas as pd
from IPython.display import display

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 400)

NOTEBOOK_RELATIVE_PATH = Path("notebooks/data/interim/02_01_build_tag_id_features/preview.ipynb")
try:
    NOTEBOOK_PATH = Path(__file__).resolve()
except NameError:  # running interactively
    NOTEBOOK_PATH = (Path.cwd() / NOTEBOOK_RELATIVE_PATH).resolve()


def resolve_project_root(notebook_path: Path) -> Path:
    for candidate in notebook_path.parents:
        if (candidate / "data").exists() and (candidate / "src").exists():
            return candidate
    return notebook_path.parents[-1]


PROJECT_ROOT = resolve_project_root(NOTEBOOK_PATH)
OUTPUT_DIR = PROJECT_ROOT / "data" / "interim" / "02_01_build_tag_id_features"
RAW_SIGNATE_DIR = PROJECT_ROOT / "data" / "raw" / "signate"

TAG_FEATURES = OrderedDict(
    [
        ("unit_tag_id", {"column_prefix": "unit_tag"}),
        ("building_tag_id", {"column_prefix": "building_tag"}),
        ("statuses", {"column_prefix": "status_tag"}),
    ]
)
FEATURE_DISPLAY_NAMES = {
    "unit_tag_id": "unit_tag_id（住戸タグ）",
    "building_tag_id": "building_tag_id（建物タグ）",
    "statuses": "statuses（募集ステータス）",
}
TAG_IDS_PATH = OUTPUT_DIR / "tag_ids.parquet"
TRAIN_MATRIX_PATH = OUTPUT_DIR / "train_tag_ids.parquet"
TEST_MATRIX_PATH = OUTPUT_DIR / "test_tag_ids.parquet"
TAG_MASTER_PATH = RAW_SIGNATE_DIR / "tag_master.csv"
INFRA_TAG_MASTER_PATH = RAW_SIGNATE_DIR / "infra_tags.csv"
HEAD_ROWS = 10
ACTIVATION_TOP_N = 50
TYPE_LABEL_TITLES = {
    "kodate": "戸建て",
    "mansion": "マンション",
}
COLUMN_OVERVIEW_SAMPLE_SIZE = 5
COLUMN_OVERVIEW_CHUNK_SIZE = 200
COLUMN_OVERVIEW_RANDOM_SEED = 314159


def read_parquet_or_warn(path: Path) -> pd.DataFrame | None:
    if not path.exists():
        print(f"⚠️ Missing file: {path.relative_to(PROJECT_ROOT)}")
        return None
    return pd.read_parquet(path)



In [2]:
def build_column_overview(
    df: pd.DataFrame,
    *,
    sample_size: int = COLUMN_OVERVIEW_SAMPLE_SIZE,
    random_state: int | None = COLUMN_OVERVIEW_RANDOM_SEED,
) -> pd.DataFrame:
    row_count = len(df)
    overview_records: list[dict[str, object]] = []
    sample_columns = [f"random_sample_{i + 1}" for i in range(sample_size)]
    for idx, column in enumerate(df.columns):
        series = df[column]
        not_null_count = int(series.notna().sum())
        not_null_rate = (not_null_count / row_count * 100) if row_count else 0.0
        min_value = series.min(skipna=True)
        max_value = series.max(skipna=True)
        non_null = series.dropna()
        sample_n = min(sample_size, len(non_null))
        samples_list: list[object] = []
        if sample_n > 0:
            seed = None if random_state is None else random_state + idx
            samples_list = non_null.sample(n=sample_n, random_state=seed, replace=False).tolist()
        record: dict[str, object] = {
            "column_name": column,
            "row_count": row_count,
            "not_null_count": not_null_count,
            "not_null_rate_pct": round(not_null_rate, 2),
            "min": min_value,
            "max": max_value,
        }
        for sample_idx, col_name in enumerate(sample_columns):
            record[col_name] = samples_list[sample_idx] if sample_idx < len(samples_list) else pd.NA
        overview_records.append(record)
    return pd.DataFrame(overview_records)


def display_column_overview(
    df: pd.DataFrame | None,
    label: str,
    *,
    sample_size: int = COLUMN_OVERVIEW_SAMPLE_SIZE,
    chunk_size: int = COLUMN_OVERVIEW_CHUNK_SIZE,
    random_state: int | None = COLUMN_OVERVIEW_RANDOM_SEED,
) -> None:
    print(f"\n--- {label}: カラムサマリー ---")
    if df is None:
        print("⚠️ DataFrame が None のためサマリーを表示できません。")
        return
    if df.shape[1] == 0:
        print("(列が存在しません)")
        return
    overview = build_column_overview(
        df=df,
        sample_size=sample_size,
        random_state=random_state,
    )
    if overview.empty:
        print("(列が存在しません)")
        return
    total_cols = overview.shape[0]
    print(f"{total_cols} columns")
    for start in range(0, total_cols, chunk_size):
        end = min(start + chunk_size, total_cols)
        print(f"columns {start + 1}-{end} / {total_cols}")
        display(overview.iloc[start:end])


def summarize_tag_lookup(df: pd.DataFrame) -> None:
    print("=== tag_ids lookup ===")
    print(f"path: {TAG_IDS_PATH.relative_to(PROJECT_ROOT)}")
    print(f"shape: {df.shape[0]} rows x {df.shape[1]} cols")
    summary = (
        df.groupby("feature_name")
        .size()
        .rename("unique_tag_ids")
        .reset_index()
    )
    display(summary)
    display(df.head(20))
    display_column_overview(df, label="tag_ids lookup")


def summarize_matrix(label: str, path: Path, df: pd.DataFrame, tag_lookup: pd.DataFrame) -> None:
    print(f"\n=== {label} matrix ===")
    print(f"path: {path.relative_to(PROJECT_ROOT)}")
    print(f"shape: {df.shape[0]} rows x {df.shape[1]} cols")

    block_summaries = []
    for feature_name, meta in TAG_FEATURES.items():
        tag_ids = (
            tag_lookup.loc[tag_lookup["feature_name"] == feature_name, "tag_id"].tolist()
        )
        columns = [f"{meta['column_prefix']}_{tag}" for tag in tag_ids]
        missing = [col for col in columns if col not in df.columns]
        if missing:
            print(f"⚠️ {feature_name}: {len(missing)} columns missing from matrix (unexpected)")
        present_cols = [col for col in columns if col in df.columns]
        if not present_cols:
            continue
        block = df[present_cols]
        per_row = block.sum(axis=1)
        block_summaries.append(
            {
                "feature_name": feature_name,
                "n_columns": len(present_cols),
                "mean_tags_per_row": per_row.mean(),
                "rows_with_any_tag_pct": (per_row > 0).mean() * 100,
                "total_tag_hits": int(block.to_numpy().sum()),
            }
        )
    if block_summaries:
        summary_df = pd.DataFrame(block_summaries)
        display(summary_df)

    preview_cols = [col for col in df.columns if col != "data_id"]
    preview_cols = ["data_id", *preview_cols[:15]]
    display(df[preview_cols].head(HEAD_ROWS))
    display_column_overview(df, label=f"{label} matrix")



In [3]:
tag_lookup = read_parquet_or_warn(TAG_IDS_PATH)
if tag_lookup is not None:
    summarize_tag_lookup(tag_lookup)
else:
    print("⚠️ Skipping matrix inspection because tag lookup is missing.")



=== tag_ids lookup ===
path: data/interim/02_01_build_tag_id_features/tag_ids.parquet
shape: 359 rows x 2 cols


Unnamed: 0,feature_name,unique_tag_ids
0,building_tag_id,90
1,statuses,152
2,unit_tag_id,117


Unnamed: 0,feature_name,tag_id
0,unit_tag_id,110201
1,unit_tag_id,110202
2,unit_tag_id,110301
3,unit_tag_id,110302
4,unit_tag_id,110503
5,unit_tag_id,110601
6,unit_tag_id,110603
7,unit_tag_id,110901
8,unit_tag_id,110902
9,unit_tag_id,110903



--- tag_ids lookup: カラムサマリー ---
2 columns
columns 1-2 / 2


Unnamed: 0,column_name,row_count,not_null_count,not_null_rate_pct,min,max,random_sample_1,random_sample_2,random_sample_3,random_sample_4,random_sample_5
0,feature_name,359,359,100.0,building_tag_id,unit_tag_id,statuses,unit_tag_id,unit_tag_id,building_tag_id,statuses
1,tag_id,359,359,100.0,110101,714303,433301,260503,260501,335001,210302


In [4]:
if tag_lookup is not None:
    matrices = [
        ("train", TRAIN_MATRIX_PATH),
        ("test", TEST_MATRIX_PATH),
    ]
    for label, path in matrices:
        matrix_df = read_parquet_or_warn(path)
        if matrix_df is None:
            continue
        summarize_matrix(label, path, matrix_df, tag_lookup)




=== train matrix ===
path: data/interim/02_01_build_tag_id_features/train_tag_ids.parquet
shape: 363924 rows x 362 cols


Unnamed: 0,feature_name,n_columns,mean_tags_per_row,rows_with_any_tag_pct,total_tag_hits
0,unit_tag_id,117,13.077604,82.620547,4759254
1,building_tag_id,90,5.546317,92.266242,2018438
2,statuses,152,13.342126,96.540486,4855520


Unnamed: 0,data_id,bukken_type,bukken_type_label,unit_tag_110201,unit_tag_110202,unit_tag_110301,unit_tag_110302,unit_tag_110503,unit_tag_110601,unit_tag_110603,unit_tag_110901,unit_tag_110902,unit_tag_110903,unit_tag_113401,unit_tag_113402,unit_tag_210101
0,0,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,1302,mansion,0,0,0,0,0,0,0,0,0,0,0,0,0
5,5,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,1
6,6,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
7,7,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
8,8,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,1
9,9,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0



--- train matrix: カラムサマリー ---
362 columns
columns 1-200 / 362


Unnamed: 0,column_name,row_count,not_null_count,not_null_rate_pct,min,max,random_sample_1,random_sample_2,random_sample_3,random_sample_4,random_sample_5
0,data_id,363924,363924,100.0,0,363923,84721,38497,115179,68594,713
1,bukken_type,363924,363924,100.0,1202,1302,1202,1302,1302,1302,1302
2,bukken_type_label,363924,363924,100.0,kodate,mansion,mansion,kodate,mansion,kodate,mansion
3,unit_tag_110201,363924,363924,100.0,0,1,0,0,0,0,0
4,unit_tag_110202,363924,363924,100.0,0,1,0,0,0,0,0
5,unit_tag_110301,363924,363924,100.0,0,1,0,0,0,0,0
6,unit_tag_110302,363924,363924,100.0,0,1,0,0,0,0,0
7,unit_tag_110503,363924,363924,100.0,0,1,0,0,0,0,0
8,unit_tag_110601,363924,363924,100.0,0,1,0,0,0,0,0
9,unit_tag_110603,363924,363924,100.0,0,1,0,0,0,0,0


columns 201-362 / 362


Unnamed: 0,column_name,row_count,not_null_count,not_null_rate_pct,min,max,random_sample_1,random_sample_2,random_sample_3,random_sample_4,random_sample_5
200,building_tag_714003,363924,363924,100.0,0,1,0,0,0,0,0
201,building_tag_714101,363924,363924,100.0,0,1,0,0,0,0,0
202,building_tag_714102,363924,363924,100.0,0,1,0,0,0,0,0
203,building_tag_714103,363924,363924,100.0,0,1,0,0,0,0,0
204,building_tag_714201,363924,363924,100.0,0,1,0,0,0,0,0
205,building_tag_714202,363924,363924,100.0,0,1,0,0,0,0,0
206,building_tag_714203,363924,363924,100.0,0,1,0,0,0,0,0
207,building_tag_714301,363924,363924,100.0,0,1,0,0,0,0,0
208,building_tag_714302,363924,363924,100.0,0,1,0,0,0,0,0
209,building_tag_714303,363924,363924,100.0,0,1,0,0,0,0,0



=== test matrix ===
path: data/interim/02_01_build_tag_id_features/test_tag_ids.parquet
shape: 112437 rows x 362 cols


Unnamed: 0,feature_name,n_columns,mean_tags_per_row,rows_with_any_tag_pct,total_tag_hits
0,unit_tag_id,117,14.750233,86.330123,1658472
1,building_tag_id,90,5.573557,92.932931,626674
2,statuses,152,14.900575,97.815666,1675376


Unnamed: 0,data_id,bukken_type,bukken_type_label,unit_tag_110201,unit_tag_110202,unit_tag_110301,unit_tag_110302,unit_tag_110503,unit_tag_110601,unit_tag_110603,unit_tag_110901,unit_tag_110902,unit_tag_110903,unit_tag_113401,unit_tag_113402,unit_tag_210101
0,0,1302,mansion,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,1
4,4,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
5,5,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
6,6,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
7,7,1302,mansion,0,0,0,0,0,0,0,0,0,0,0,0,0
8,8,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0
9,9,1202,kodate,0,0,0,0,0,0,0,0,0,0,0,0,0



--- test matrix: カラムサマリー ---
362 columns
columns 1-200 / 362


Unnamed: 0,column_name,row_count,not_null_count,not_null_rate_pct,min,max,random_sample_1,random_sample_2,random_sample_3,random_sample_4,random_sample_5
0,data_id,112437,112437,100.0,0,99999,80075,86027,5668,111827,9382
1,bukken_type,112437,112437,100.0,1202,1302,1302,1202,1302,1202,1302
2,bukken_type_label,112437,112437,100.0,kodate,mansion,kodate,mansion,mansion,kodate,mansion
3,unit_tag_110201,112437,112437,100.0,0,1,0,0,0,0,0
4,unit_tag_110202,112437,112437,100.0,0,1,0,0,0,0,0
5,unit_tag_110301,112437,112437,100.0,0,1,0,0,0,0,0
6,unit_tag_110302,112437,112437,100.0,0,1,0,0,0,0,0
7,unit_tag_110503,112437,112437,100.0,0,1,0,0,0,0,0
8,unit_tag_110601,112437,112437,100.0,0,1,0,0,0,0,0
9,unit_tag_110603,112437,112437,100.0,0,1,0,0,0,0,0


columns 201-362 / 362


Unnamed: 0,column_name,row_count,not_null_count,not_null_rate_pct,min,max,random_sample_1,random_sample_2,random_sample_3,random_sample_4,random_sample_5
200,building_tag_714003,112437,112437,100.0,0,1,0,0,0,0,0
201,building_tag_714101,112437,112437,100.0,0,1,0,0,0,0,0
202,building_tag_714102,112437,112437,100.0,0,1,0,0,0,0,0
203,building_tag_714103,112437,112437,100.0,0,1,0,0,0,0,0
204,building_tag_714201,112437,112437,100.0,0,1,0,0,0,0,0
205,building_tag_714202,112437,112437,100.0,0,1,0,0,0,0,0
206,building_tag_714203,112437,112437,100.0,0,1,0,0,0,0,0
207,building_tag_714301,112437,112437,100.0,0,1,0,0,0,0,0
208,building_tag_714302,112437,112437,100.0,0,1,0,0,0,0,0
209,building_tag_714303,112437,112437,100.0,0,1,0,0,0,0,0


In [5]:
def load_tag_master_table(path: Path) -> pd.DataFrame | None:
    if not path.exists():
        print(f"⚠️ Missing master file: {path.relative_to(PROJECT_ROOT)}")
        return None
    df = pd.read_csv(path, dtype={"TAG_ID": "string"})
    rename_map = {"TAG_ID": "tag_id", "VALUE": "tag_value"}
    df = df.rename(columns=rename_map)
    optional_cols: list[str] = []
    if "MEMO" in df.columns:
        df = df.rename(columns={"MEMO": "tag_note"})
        optional_cols.append("tag_note")
    df = df.drop(columns=["ID"], errors="ignore")
    keep_cols = ["tag_id", "tag_value", *optional_cols]
    df = df[keep_cols].drop_duplicates(subset="tag_id", keep="first")
    df["tag_id"] = df["tag_id"].astype("string[python]")
    df["tag_value"] = df["tag_value"].astype("string[python]")
    if "tag_note" in df.columns:
        df["tag_note"] = df["tag_note"].astype("string[python]")
    return df


def compute_activation_leaderboards(
    matrix_df: pd.DataFrame,
    feature_name: str,
    master_df: pd.DataFrame,
    *,
    top_n: int = ACTIVATION_TOP_N,
) -> dict[str, pd.DataFrame]:
    meta = TAG_FEATURES.get(feature_name)
    if meta is None:
        return {}
    prefix = f"{meta['column_prefix']}_"
    feature_cols = [col for col in matrix_df.columns if col.startswith(prefix)]
    if not feature_cols:
        print(f"⚠️ {feature_name}: encoded columns not found in matrix")
        return {}

    leaderboards: dict[str, pd.DataFrame] = {}
    for type_label, type_title in TYPE_LABEL_TITLES.items():
        subset = matrix_df.loc[matrix_df["bukken_type_label"] == type_label, feature_cols]
        if subset.empty:
            continue
        sample_size = subset.shape[0]
        stats = (
            subset.sum()
            .rename("active_count")
            .reset_index()
            .rename(columns={"index": "column_name"})
        )
        stats["active_count"] = stats["active_count"].astype("int64")
        stats["sample_size"] = sample_size
        stats["activation_rate"] = stats["active_count"] / stats["sample_size"]
        stats["tag_id"] = stats["column_name"].str[len(prefix) :]
        enriched = stats.merge(master_df, on="tag_id", how="left")
        enriched["tag_value"] = enriched["tag_value"].fillna("(master未登録)")
        if "tag_note" in enriched.columns:
            enriched["tag_note"] = enriched["tag_note"].fillna("")
        enriched["activation_rate_pct"] = (enriched["activation_rate"] * 100).round(2)
        sorted_df = (
            enriched.sort_values("activation_rate_pct", ascending=False)
            .head(top_n)
            .reset_index(drop=True)
        )
        sorted_df.insert(0, "rank", sorted_df.index + 1)
        columns = [
            "rank",
            "tag_id",
            "tag_value",
            "activation_rate_pct",
            "active_count",
            "sample_size",
        ]
        if "tag_note" in sorted_df.columns:
            columns.append("tag_note")
        leaderboards[type_label] = sorted_df[columns]
    return leaderboards


def display_activation_leaderboards(
    matrix_df: pd.DataFrame,
    feature_name: str,
    master_df: pd.DataFrame,
) -> None:
    if matrix_df is None or master_df is None:
        print(f"⚠️ {feature_name}: source matrix or master is missing")
        return
    display_name = FEATURE_DISPLAY_NAMES.get(feature_name, feature_name)
    leaderboards = compute_activation_leaderboards(
        matrix_df=matrix_df,
        feature_name=feature_name,
        master_df=master_df,
        top_n=ACTIVATION_TOP_N,
    )
    if not leaderboards:
        print(f"⚠️ {feature_name}: ランキングを作成できませんでした")
        return
    print(f"\n=== {display_name}: activation_rate_pct 上位 {ACTIVATION_TOP_N} ===")
    for type_label, type_title in TYPE_LABEL_TITLES.items():
        table = leaderboards.get(type_label)
        if table is None:
            print(f"({type_title}) データがありません")
            continue
        print(f"\n-- {type_title} --")
        display(table)



In [6]:
train_matrix_df = read_parquet_or_warn(TRAIN_MATRIX_PATH)
unit_building_master = load_tag_master_table(TAG_MASTER_PATH)
status_master = load_tag_master_table(INFRA_TAG_MASTER_PATH)

if train_matrix_df is None:
    print("⚠️ train_tag_ids.parquet が見つからないため、activation率のランキングをスキップします。")
else:
    feature_master_map = {
        "unit_tag_id": unit_building_master,
        "building_tag_id": unit_building_master,
        "statuses": status_master,
    }
    for feature_name, master_df in feature_master_map.items():
        display_activation_leaderboards(train_matrix_df, feature_name, master_df)




=== unit_tag_id（住戸タグ）: activation_rate_pct 上位 50 ===

-- 戸建て --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size,tag_note
0,1,230401,システムキッチン,58.41,96552,165310,戸に紐づくもの
1,2,290101,フローリング,49.2,81336,165310,戸に紐づくもの
2,3,220701,温水洗浄便座,47.54,78593,165310,戸に紐づくもの
3,4,290401,バルコニー,46.52,76897,165310,戸に紐づくもの
4,5,220401,追い焚き,42.08,69568,165310,戸に紐づくもの
5,6,223101,独立洗面台,41.19,68083,165310,戸に紐づくもの
6,7,220601,洗髪洗面化粧台,40.69,67259,165310,戸に紐づくもの
7,8,220101,専用バス,40.61,67133,165310,戸に紐づくもの
8,9,220201,専用トイレ,39.81,65812,165310,戸に紐づくもの
9,10,290901,室内洗濯機置場,39.73,65673,165310,戸に紐づくもの



-- マンション --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size,tag_note
0,1,290401,バルコニー,73.93,146839,198614,戸に紐づくもの
1,2,340102,2階以上,70.83,140686,198614,戸に紐づくもの
2,3,230401,システムキッチン,68.35,135752,198614,戸に紐づくもの
3,4,290101,フローリング,59.76,118697,198614,戸に紐づくもの
4,5,220301,バス・トイレ別,58.68,116541,198614,戸に紐づくもの
5,6,220701,温水洗浄便座,58.47,116124,198614,戸に紐づくもの
6,7,290901,室内洗濯機置場,58.2,115600,198614,戸に紐づくもの
7,8,230801,給湯,53.13,105518,198614,戸に紐づくもの
8,9,310501,TVモニタ付インターホン,52.72,104701,198614,戸に紐づくもの
9,10,223101,独立洗面台,51.58,102436,198614,戸に紐づくもの



=== building_tag_id（建物タグ）: activation_rate_pct 上位 50 ===

-- 戸建て --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size,tag_note
0,1,210101,公営水道,79.2,130930,165310,棟に紐づくもの
1,2,210301,下水,66.68,110222,165310,棟に紐づくもの
2,3,210201,都市ガス,43.49,71887,165310,棟に紐づくもの
3,4,210202,プロパンガス,27.0,44632,165310,棟に紐づくもの
4,5,340301,角地,13.4,22146,165310,棟に紐づくもの
5,6,210302,浄化槽,12.74,21057,165310,棟に紐づくもの
6,7,210401,オール電化,11.08,18315,165310,棟に紐づくもの
7,8,320901,バイク置き場あり,6.12,10113,165310,棟に紐づくもの
8,9,321001,駐輪場あり,4.53,7494,165310,棟に紐づくもの
9,10,294201,太陽光発電システム,3.88,6421,165310,棟に紐づくもの



-- マンション --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size,tag_note
0,1,210101,公営水道,88.18,175132,198614,棟に紐づくもの
1,2,210301,下水,87.33,173446,198614,棟に紐づくもの
2,3,320101,エレベーター,86.59,171973,198614,棟に紐づくもの
3,4,210201,都市ガス,84.55,167934,198614,棟に紐づくもの
4,5,321001,駐輪場あり,82.39,163630,198614,棟に紐づくもの
5,6,310101,オートロック,62.04,123230,198614,棟に紐づくもの
6,7,320901,バイク置き場あり,59.26,117689,198614,棟に紐づくもの
7,8,310201,防犯カメラ,50.6,100498,198614,棟に紐づくもの
8,9,321101,宅配ボックス,46.54,92442,198614,棟に紐づくもの
9,10,330501,タイル貼り,36.62,72725,198614,棟に紐づくもの



=== statuses（募集ステータス）: activation_rate_pct 上位 50 ===

-- 戸建て --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size
0,1,210101,公営水道,78.97,130542,165310
1,2,210301,下水,65.99,109080,165310
2,3,230401,システムキッチン,49.03,81047,165310
3,4,210201,都市ガス,42.32,69952,165310
4,5,220701,温水洗浄便座,35.96,59446,165310
5,6,290101,フローリング,33.42,55252,165310
6,7,290401,バルコニー,32.72,54097,165310
7,8,220401,追焚機能,30.27,50043,165310
8,9,220601,シャワー付洗面化粧台,28.5,47118,165310
9,10,310501,TVモニタ付インターホン,27.13,44855,165310



-- マンション --


Unnamed: 0,rank,tag_id,tag_value,activation_rate_pct,active_count,sample_size
0,1,340102,2階以上,89.52,177801,198614
1,2,290401,バルコニー,72.76,144513,198614
2,3,320101,エレベーター,69.32,137670,198614
3,4,230401,システムキッチン,67.01,133082,198614
4,5,210101,公営水道,58.88,116934,198614
5,6,210301,下水,57.07,113341,198614
6,7,210201,都市ガス,55.64,110518,198614
7,8,220701,温水洗浄便座,50.92,101137,198614
8,9,290101,フローリング,45.56,90498,198614
9,10,310101,オートロック,45.0,89382,198614
