In [1]:
# STEP A: Build 23→24 change profile for one fire (Lake)
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
import rasterio
from rasterio import windows
from rasterio.features import rasterize
from rasterio.warp import reproject, Resampling
from dbfread import DBF

# ---------- Config ----------
RING_BUFFER_M = 1000  # 1 km ring
TRACK_DOM_THRESHOLD = 0.55  # used later in step B, but we compute shares here

# ---------- Helpers ----------

def load_lut_from_dbf(dbf_path, value_col='VALUE', model_col='FBFM40'):
    tbl = DBF(dbf_path, load=True, char_decode_errors='ignore')
    lut = pd.DataFrame(iter(tbl))[[value_col, model_col]].copy()
    lut.columns = ['VALUE', 'MODEL']
    # normalize to upper strings
    lut['MODEL'] = lut['MODEL'].astype(str).str.upper()
    lut['VALUE'] = lut['VALUE'].astype(int)
    return lut

def read_base_and_crop(tif_path, geom_union, buffer_m=0):
    """Read a crop from tif_path around geom_union buffered by buffer_m (meters)."""
    with rasterio.open(tif_path) as src:
        geom_buf = geom_union.buffer(buffer_m)
        minx, miny, maxx, maxy = geom_buf.bounds
        win = windows.from_bounds(minx, miny, maxx, maxy, transform=src.transform)
        win = win.round_offsets().round_lengths()
        arr = src.read(1, window=win, boundless=True, fill_value=src.nodata)
        profile = src.profile.copy()
        profile.update({
            'height': win.height,
            'width':  win.width,
            'transform': windows.transform(win, src.transform)
        })
        nodata = src.nodata
    return arr, profile, nodata

def reproject_to_match(src_tif, dst_profile):
    """Reproject a raster on disk (src_tif) to match dst_profile, NN resampling; returns array."""
    with rasterio.open(src_tif) as src:
        dst = np.full((dst_profile['height'], dst_profile['width']), src.nodata if src.nodata is not None else 0, dtype=src.dtypes[0])
        reproject(
            source=rasterio.band(src, 1),
            destination=dst,
            src_transform=src.transform,
            src_crs=src.crs,
            dst_transform=dst_profile['transform'],
            dst_crs=dst_profile['crs'],
            resampling=Resampling.nearest,
            dst_nodata=src.nodata
        )
        return dst, src.nodata

def rasterize_mask(geoms, profile, burn_value=1):
    if geoms is None or len(geoms) == 0:
        return np.zeros((profile['height'], profile['width']), dtype=np.uint8)
    shapes = [(g, burn_value) for g in geoms]
    return rasterize(
        shapes,
        out_shape=(profile['height'], profile['width']),
        transform=profile['transform'],
        fill=0,
        all_touched=False,
        dtype=np.uint8
    )

def df_transitions(src_codes, dst_codes, src_name, dst_name):
    """Return counts and row-normalized percentages for src→dst."""
    df = pd.DataFrame({src_name: src_codes, dst_name: dst_codes})
    counts = df.value_counts().rename('count').reset_index()
    # Row-normalized share
    totals = counts.groupby(src_name)['count'].sum().rename('row_total').reset_index()
    out = counts.merge(totals, on=src_name)
    out['row_share'] = out['count'] / out['row_total'].replace(0, np.nan)
    return out.sort_values([src_name, 'count'], ascending=[True, False]).reset_index(drop=True)

def add_model_meta(series_value, lut_df, fbfm40_meta):
    """Map numeric VALUE→MODEL and attach general_type & climate."""
    df = pd.DataFrame({'VALUE': series_value})
    df = df.merge(lut_df, on='VALUE', how='left')
    df = df.merge(fbfm40_meta, on='MODEL', how='left')  # expects columns MODEL, general_type, climate
    return df

def ring_context_tables(model24_ring, fam24_ring, track24_ring, fbfm40_meta, lut_df):
    """Ring (2024) summaries: modal model per (family, climate) and track shares per family."""
    ring_df = pd.DataFrame({
        'VALUE24': model24_ring.index,  # placeholder, will be dropped
    })
    # Actually we need model strings; model24_ring is already model strings if we pass so
    # We'll build from arrays directly in main function; leaving this here for structure.
    pass

# ---------- Main step A function ----------

def build_change_profile_stepA(
    lf24_tif, lf23_tif,
    dbf_path,
    fbfm40_meta,          # DataFrame: MODEL | general_type | climate
    scar_gdf,             # GeoDataFrame of fire scar polygons (CRS must match rasters or will be reprojected to 24)
    recent_burns_20yr_gdf=None,  # optional GeoDataFrame of recent burns to exclude from ring
    ring_buffer_m=RING_BUFFER_M
):
    """
    Returns a dict of DataFrames:
      - model2model_inside, model2model_ring
      - family2family_inside, family2family_ring
      - track2track_inside, track2track_ring  (track = (family, climate))
      - ring_modal_by_family_climate (MODEL_modal, share_within_family)
      - ring_track_shares_within_family (share of each climate per family)
    """
    # 1) Read LF24 as base window around scar buffer; reproject LF23 to match
    scar_union = scar_gdf.unary_union
    lf24_arr, base_prof, nodata24 = read_base_and_crop(lf24_tif, scar_union, buffer_m=ring_buffer_m*1.5)
    lf23_arr, nodata23 = reproject_to_match(lf23_tif, base_prof)

    # 2) Ensure geoms are in base CRS
    base_crs = base_prof['crs']
    if scar_gdf.crs != base_crs:
        scar_gdf = scar_gdf.to_crs(base_crs)
    if recent_burns_20yr_gdf is not None and recent_burns_20yr_gdf.crs != base_crs:
        recent_burns_20yr_gdf = recent_burns_20yr_gdf.to_crs(base_crs)

    # 3) Build masks: inside & ring (1 km outside, minus inside, minus recent burns)
    scar_mask = rasterize_mask(list(scar_gdf.geometry), base_prof, burn_value=1)

    ring_geom = scar_gdf.buffer(ring_buffer_m).unary_union.difference(scar_union)
    if recent_burns_20yr_gdf is not None and len(recent_burns_20yr_gdf) > 0:
        ring_geom = ring_geom.difference(recent_burns_20yr_gdf.unary_union)

    ring_mask = rasterize_mask([ring_geom] if not ring_geom.is_empty else [], base_prof, burn_value=1)

    # 4) Build valid-data mask (both years not nodata)
    valid = np.ones(lf24_arr.shape, dtype=bool)
    if nodata24 is not None:
        valid &= (lf24_arr != nodata24)
    if nodata23 is not None:
        valid &= (lf23_arr != nodata23)

    # 5) Flatten arrays by mask
    inside_mask = (scar_mask == 1) & valid
    ring_mask   = (ring_mask == 1) & valid

    v24_inside = lf24_arr[inside_mask].astype(int, copy=False)
    v23_inside = lf23_arr[inside_mask].astype(int, copy=False)
    v24_ring   = lf24_arr[ring_mask].astype(int, copy=False)
    v23_ring   = lf23_arr[ring_mask].astype(int, copy=False)

    # 6) VALUE→MODEL; MODEL→family/climate
    lut_df = load_lut_from_dbf(dbf_path)  # VALUE | MODEL
    meta = fbfm40_meta.copy()
    meta['MODEL'] = meta['MODEL'].astype(str).str.upper()

    # Inside lookups
    meta24_in = add_model_meta(pd.Series(v24_inside, name='VALUE24'), lut_df, meta).rename(columns={
        'VALUE':'VALUE24', 'MODEL':'MODEL24','general_type':'FAM24','climate':'CLIM24'
    }).reset_index(drop=True)

    meta23_in = add_model_meta(pd.Series(v23_inside, name='VALUE23'), lut_df, meta).rename(columns={
        'VALUE':'VALUE23', 'MODEL':'MODEL23','general_type':'FAM23','climate':'CLIM23'
    }).reset_index(drop=True)

    inside_df = pd.concat(
        [meta23_in[['VALUE23','MODEL23','FAM23','CLIM23']],
        meta24_in[['VALUE24','MODEL24','FAM24','CLIM24']]],
        axis=1
    )

    # Ring lookups
    meta24_rg = add_model_meta(pd.Series(v24_ring, name='VALUE24'), lut_df, meta).rename(columns={
        'VALUE':'VALUE24', 'MODEL':'MODEL24','general_type':'FAM24','climate':'CLIM24'
    }).reset_index(drop=True)

    meta23_rg = add_model_meta(pd.Series(v23_ring, name='VALUE23'), lut_df, meta).rename(columns={
        'VALUE':'VALUE23', 'MODEL':'MODEL23','general_type':'FAM23','climate':'CLIM23'
    }).reset_index(drop=True)

    ring_df = pd.concat(
        [meta23_rg[['VALUE23','MODEL23','FAM23','CLIM23']],
        meta24_rg[['VALUE24','MODEL24','FAM24','CLIM24']]],
        axis=1
    )


    # 7) Transition tables (counts + row_share)
    # Model→Model
    model2model_inside = df_transitions(inside_df['MODEL23'], inside_df['MODEL24'], 'MODEL23', 'MODEL24')
    model2model_ring   = df_transitions(ring_df['MODEL23'],   ring_df['MODEL24'],   'MODEL23', 'MODEL24')

    # Family→Family
    family2family_inside = df_transitions(inside_df['FAM23'], inside_df['FAM24'], 'FAM23', 'FAM24')
    family2family_ring   = df_transitions(ring_df['FAM23'],   ring_df['FAM24'],   'FAM23', 'FAM24')

    # (Family,Climate) → (Family,Climate)
    inside_df['TRACK23'] = inside_df['FAM23'].astype(str) + ' | ' + inside_df['CLIM23'].astype(str)
    inside_df['TRACK24'] = inside_df['FAM24'].astype(str) + ' | ' + inside_df['CLIM24'].astype(str)
    ring_df['TRACK23']   = ring_df['FAM23'].astype(str)   + ' | ' + ring_df['CLIM23'].astype(str)
    ring_df['TRACK24']   = ring_df['FAM24'].astype(str)   + ' | ' + ring_df['CLIM24'].astype(str)

    track2track_inside = df_transitions(inside_df['TRACK23'], inside_df['TRACK24'], 'TRACK23', 'TRACK24')
    track2track_ring   = df_transitions(ring_df['TRACK23'],   ring_df['TRACK24'],   'TRACK23', 'TRACK24')

    # 8) Ring (2024) context:
    # 8a) Modal MODEL per (FAM, CLIM) + share within that (FAM, CLIM)
    rg24 = meta24_rg[['MODEL24','FAM24','CLIM24']].copy()
    rg24['cnt'] = 1
    grp = rg24.groupby(['FAM24','CLIM24','MODEL24'])['cnt'].sum().reset_index()
    # pick modal by (FAM,CLIM)
    idx = grp.groupby(['FAM24','CLIM24'])['cnt'].idxmax()
    modal_by_fc = grp.loc[idx, ['FAM24','CLIM24','MODEL24','cnt']].rename(columns={'MODEL24':'MODEL_modal_fc','cnt':'modal_count'})
    # compute share within (FAM,CLIM)
    totals_fc = grp.groupby(['FAM24','CLIM24'])['cnt'].sum().rename('fc_total').reset_index()
    ring_modal_by_family_climate = modal_by_fc.merge(totals_fc, on=['FAM24','CLIM24'])
    ring_modal_by_family_climate['modal_share_in_fc'] = ring_modal_by_family_climate['modal_count'] / ring_modal_by_family_climate['fc_total']

    # 8b) Track shares within each Family (CLIM share within FAM)
    fc_counts = rg24.groupby(['FAM24','CLIM24'])['cnt'].sum().reset_index()
    fam_totals = fc_counts.groupby('FAM24')['cnt'].sum().rename('fam_total').reset_index()
    ring_track_shares_within_family = fc_counts.merge(fam_totals, on='FAM24')
    ring_track_shares_within_family['track_share_in_fam'] = ring_track_shares_within_family['cnt'] / ring_track_shares_within_family['fam_total']

    # Pack results
    out = {
        'model2model_inside': model2model_inside,
        'model2model_ring':   model2model_ring,
        'family2family_inside': family2family_inside,
        'family2family_ring':   family2family_ring,
        'track2track_inside': track2track_inside,
        'track2track_ring':   track2track_ring,
        'ring_modal_by_family_climate': ring_modal_by_family_climate.sort_values(['FAM24','CLIM24']).reset_index(drop=True),
        'ring_track_shares_within_family': ring_track_shares_within_family.sort_values(['FAM24','track_share_in_fam'], ascending=[True, False]).reset_index(drop=True),
        'pixels_inside': int(inside_mask.sum()),
        'pixels_ring':   int(ring_mask.sum()),
    }
    return out


In [None]:
# Inputs 
lf24_tif = r"C:\Users\bsf31\Documents\data\NL060\landfire_meszxc7dfpgmqh\LF2024_FBFM40_250_CONUS\LC24_F40_250.tif"
lf23_tif = r"C:\Users\bsf31\Documents\data\NL060\LFV2023\LF2023_FBFM40_240_CONUS\LC23_F40_240.tif"
dbf_path = r"C:\Users\bsf31\Documents\data\NL060\landfire_meszxc7dfpgmqh\LF2024_FBFM40_250_CONUS\LF24_F40_250.dbf"
project_gpkg   = r"C:\Users\bsf31\Documents\data\NL060\fire_scar_training_regions.gpkg"


meta = {
    # --- GRASS ---
    "GR1": ("Grass", "Arid–semiarid (EMC 15%)", "Short, patchy, possibly grazed; spread moderate; flame low."),
    "GR2": ("Grass", "Arid–semiarid (EMC 15%)", "Moderately coarse, ~1 ft; spread high; flame moderate."),
    "GR4": ("Grass", "Arid–semiarid (EMC 15%)", "Moderately coarse, ~2 ft; spread very high; flame high."),
    "GR7": ("Grass", "Arid–semiarid (EMC 15%)", "Moderately coarse, ~3 ft; spread very high; flame very high."),
    "GR3": ("Grass", "Subhumid–humid (EMC 30–40%)", "Very coarse, ~2 ft; spread high; flame moderate."),
    "GR5": ("Grass", "Subhumid–humid (EMC 30–40%)", "Dense, coarse, 1–2 ft; spread very high; flame high."),
    "GR6": ("Grass", "Subhumid–humid (EMC 30–40%)", "Dryland grass 1–2 ft; spread very high; flame very high."),
    "GR8": ("Grass", "Subhumid–humid (EMC 30–40%)", "Heavy, coarse, 3–5 ft; spread very high; flame very high."),
    "GR9": ("Grass", "Subhumid–humid (EMC 30–40%)", "Very heavy, coarse, 5–8 ft; spread extreme; flame extreme."),
    # --- GRASS-SHRUB ---
    "GS1": ("Grass-Shrub", "Arid–semiarid (EMC 15%)", "Shrubs ~1 ft, low grass; spread moderate; flame low."),
    "GS2": ("Grass-Shrub", "Arid–semiarid (EMC 15%)", "Shrubs 1–3 ft, moderate grass; spread high; flame moderate."),
    "GS3": ("Grass-Shrub", "Subhumid–humid (EMC 30–40%)", "Moderate grass/shrub <2 ft; spread high; flame moderate."),
    "GS4": ("Grass-Shrub", "Subhumid–humid (EMC 30–40%)", "Heavy grass/shrub >2 ft; spread high; flame very high."),
    # --- SHRUB ---
    "SH1": ("Shrub", "Arid–semiarid (EMC 15%)", "Low shrub load ~1 ft; spread very low; flame very low."),
    "SH2": ("Shrub", "Arid–semiarid (EMC 15%)", "Moderate load ~1 ft; no grass; spread low; flame low."),
    "SH5": ("Shrub", "Arid–semiarid (EMC 15%)", "Heavy shrubs 4–6 ft; spread very high; flame very high."),
    "SH7": ("Shrub", "Arid–semiarid (EMC 15%)", "Very heavy shrubs 4–6 ft; spread high; flame very high."),
    "SH3": ("Shrub", "Subhumid–humid (EMC 30–40%)", "Moderate shrubs (maybe pine/herb); 2–3 ft; spread low; flame low."),
    "SH4": ("Shrub", "Subhumid–humid (EMC 30–40%)", "Low–moderate shrubs/litter (~3 ft); spread high; flame moderate."),
    "SH6": ("Shrub", "Subhumid–humid (EMC 30–40%)", "Dense shrubs, little/no herb; ~2 ft; spread high; flame high."),
    "SH8": ("Shrub", "Subhumid–humid (EMC 30–40%)", "Dense shrubs, ~3 ft; spread high; flame high."),
    "SH9": ("Shrub", "Subhumid–humid (EMC 30–40%)", "Dense, fine-branched, 4–6 ft; spread high; flame very high."),
    # --- TIMBER-UNDERSTORY ---
    "TU1": ("Timber-Understory", "Semiarid–subhumid (EMC 20%)", "Low grass/shrub + litter; spread low; flame low."),
    "TU2": ("Timber-Understory", "Humid (EMC 30%)", "Moderate litter with shrubs; spread moderate; flame low."),
    "TU3": ("Timber-Understory", "Humid (EMC 30%)", "Moderate litter + grass/shrubs; spread high; flame moderate."),
    "TU4": ("Timber-Understory", "Semiarid–subhumid (EMC 20%)", "Short conifers w/ grass/moss; spread moderate; flame moderate."),
    "TU5": ("Timber-Understory", "Semiarid–subhumid (EMC 20%)", "High conifer litter + shrubs; spread moderate; flame moderate."),
    # --- TIMBER LITTER ---
    "TL1": ("Timber Litter", "Recently burned", "Light–moderate load, 1–2 in deep; spread very low; flame very low."),
    "TL2": ("Timber Litter", "Broadleaf litter", "Low load, compact; spread very low; flame very low."),
    "TL3": ("Timber Litter", "Other conifer litter", "Moderate conifer litter; spread very low; flame low."),
    "TL4": ("Timber Litter", "Mixed fine & coarse", "Moderate load incl. small logs; spread low; flame low."),
    "TL5": ("Timber Litter", "Conifer litter", "High load; light slash/mortality; spread low; flame low."),
    "TL6": ("Timber Litter", "Broadleaf litter", "Moderate load, less compact; spread moderate; flame low."),
    "TL7": ("Timber Litter", "Mixed fine & coarse", "Heavy load incl. larger logs; spread low; flame low."),
    "TL8": ("Timber Litter", "Long-needle pine", "Moderate load/compact; some herb; spread moderate; flame low."),
    "TL9": ("Timber Litter", "Broadleaf / needle drape", "Very high load; spread moderate; flame moderate."),
    # --- SLASH/BLOWDOWN ---
    "SB1": ("Slash-Blowdown", "Activity fuel", "10–20 t/ac; fuels 1–3 in; <1 ft depth; spread moderate; flame low."),
    "SB2": ("Slash-Blowdown", "Activity fuel", "7–12 t/ac; even 0–3 in; ~1 ft; spread moderate; flame moderate."),
    "SB3": ("Slash-Blowdown", "Activity fuel", "7–12 t/ac; weighted to <0.25 in; >1 ft; spread high; flame high."),
    # blowdown variants share codes with SB2/SB3/SB4 as behavior descriptors
    "SB4": ("Slash-Blowdown", "Blowdown (total)", "Total blowdown; not compacted; foliage attached; spread very high; flame very high."),
    # --- NONBURNABLE ---
    "NB1": ("Nonburnable", "—", "Urban/suburban; insufficient wildland fuel."),
    "NB2": ("Nonburnable", "—", "Snow/ice."),
    "NB3": ("Nonburnable", "—", "Agricultural field maintained nonburnable."),
    "NB8": ("Nonburnable", "—", "Open water."),
    "NB9": ("Nonburnable", "—", "Bare ground."),
}
fbfm40_meta = (
    pd.DataFrame.from_dict(meta, orient="index", columns=["general_type", "climate", "summary"])
      .reset_index()
      .rename(columns={"index": "MODEL"})
)

# Fire scar (Lake Fire polygon)
scar_gdf = gpd.read_file(project_gpkg, layer="lake2024_5070")


In [3]:
# Optional: FRAP (≤20yr) polygons clipped to AOI
#recent_burns_20yr_gdf = gpd.read_file(".../recent_burns_20yr.gpkg", layer="recent_burns")

In [4]:
profile = build_change_profile_stepA(
    lf24_tif=lf24_tif,
    lf23_tif=lf23_tif,
    dbf_path=dbf_path,
    fbfm40_meta=fbfm40_meta,
    scar_gdf=scar_gdf, #recent_burns_20yr_gdf=recent_burns_20yr_gdf,
    ring_buffer_m=1000
)



  scar_union = scar_gdf.unary_union
  ring_geom = scar_gdf.buffer(ring_buffer_m).unary_union.difference(scar_union)


In [5]:
# Inspect the key tables
for k in ['family2family_inside','family2family_ring','track2track_inside','track2track_ring']:
    print(f"\n== {k} ==")
    print(profile[k].head(20))

print("\n== ring modal by (family, climate) ==")
print(profile['ring_modal_by_family_climate'].head(20))

print("\n== ring track shares within family ==")
print(profile['ring_track_shares_within_family'].head(20))


== family2family_inside ==
          FAM23              FAM24  count  row_total  row_share
0         Grass              Grass  14255      15648   0.910979
1         Grass        Grass-Shrub   1377      15648   0.087998
2         Grass        Nonburnable      8      15648   0.000511
3         Grass              Shrub      7      15648   0.000447
4         Grass      Timber Litter      1      15648   0.000064
5   Grass-Shrub        Grass-Shrub  14500      41857   0.346418
6   Grass-Shrub              Shrub  12451      41857   0.297465
7   Grass-Shrub      Timber Litter   7844      41857   0.187400
8   Grass-Shrub              Grass   6973      41857   0.166591
9   Grass-Shrub        Nonburnable     86      41857   0.002055
10  Grass-Shrub  Timber-Understory      3      41857   0.000072
11  Nonburnable        Nonburnable   5090       6320   0.805380
12  Nonburnable              Shrub    707       6320   0.111867
13  Nonburnable        Grass-Shrub    263       6320   0.041614
14  Nonburna

In [6]:
# Actual codes (model → model) transition tables
print("== model2model_inside ==")
print(profile['model2model_inside'].head(30))  # or .to_string()
print("\n== model2model_ring ==")
print(profile['model2model_ring'].head(30))


== model2model_inside ==
   MODEL23 MODEL24  count  row_total  row_share
0      GR1     GR1     22         39   0.564103
1      GR1     NB9      8         39   0.205128
2      GR1     SH2      5         39   0.128205
3      GR1     GS1      2         39   0.051282
4      GR1     GS2      2         39   0.051282
5      GR2     GR1  10885      10920   0.996795
6      GR2     GR2     24      10920   0.002198
7      GR2     GS2      5      10920   0.000458
8      GR2     GS1      3      10920   0.000275
9      GR2     SH2      2      10920   0.000183
10     GR2     TL1      1      10920   0.000092
11     GR3     GR2   3292       4689   0.702069
12     GR3     GS1   1356       4689   0.289187
13     GR3     GR1     32       4689   0.006824
14     GR3     GS2      9       4689   0.001919
15     GS1     GR1   1225       2200   0.556818
16     GS1     SH2    824       2200   0.374545
17     GS1     NB9     86       2200   0.039091
18     GS1     GS1     36       2200   0.016364
19     GS1     

In [7]:
# Use the model→model tables already returned by Step A
mm_in  = profile['model2model_inside'].copy()  # cols: MODEL23, MODEL24, count, row_total, row_share
mm_ring = profile['model2model_ring'].copy()

# Build MODEL -> family map from your fbfm40_meta
fam_map23 = fbfm40_meta[['MODEL','general_type']].rename(columns={'MODEL':'MODEL23','general_type':'FAM23'})
fam_map24 = fbfm40_meta[['MODEL','general_type']].rename(columns={'MODEL':'MODEL24','general_type':'FAM24'})

# Annotate families
mm_in  = mm_in.merge(fam_map23, on='MODEL23', how='left').merge(fam_map24, on='MODEL24', how='left')
mm_ring = mm_ring.merge(fam_map23, on='MODEL23', how='left').merge(fam_map24, on='MODEL24', how='left')

# Filter to changed-family only
mm_in_cf  = mm_in.loc[mm_in['FAM23']  != mm_in['FAM24']].copy()
mm_ring_cf = mm_ring.loc[mm_ring['FAM23'] != mm_ring['FAM24']].copy()

# Recompute row-normalized shares *within the changed-family subset* (optional but useful)
mm_in_cf['row_total_cf']  = mm_in_cf.groupby('MODEL23')['count'].transform('sum')
mm_in_cf['row_share_cf']  = mm_in_cf['count'] / mm_in_cf['row_total_cf'].replace(0, np.nan)

mm_ring_cf['row_total_cf'] = mm_ring_cf.groupby('MODEL23')['count'].transform('sum')
mm_ring_cf['row_share_cf'] = mm_ring_cf['count'] / mm_ring_cf['row_total_cf'].replace(0, np.nan)

# Tidy view
print("== model→model (inside), changed-family only ==")
print(mm_in_cf.sort_values(['MODEL23','count'], ascending=[True, False]).head(40))

print("\n== model→model (ring), changed-family only ==")
print(mm_ring_cf.sort_values(['MODEL23','count'], ascending=[True, False]).head(40))

# Optional: matrix view (changed-family only), row-normalized within changed-only
mm_in_mat = (mm_in_cf.pivot_table(index='MODEL23', columns='MODEL24', values='row_share_cf', fill_value=0)
             .sort_index().reindex(sorted(mm_in_cf['MODEL24'].unique()), axis=1))
print("\n== model→model (inside) matrix, changed-family row-normalized ==")
print(mm_in_mat.round(3))


== model→model (inside), changed-family only ==
   MODEL23 MODEL24  count  row_total  row_share        FAM23  \
1      GR1     NB9      8         39   0.205128        Grass   
2      GR1     SH2      5         39   0.128205        Grass   
3      GR1     GS1      2         39   0.051282        Grass   
4      GR1     GS2      2         39   0.051282        Grass   
7      GR2     GS2      5      10920   0.000458        Grass   
8      GR2     GS1      3      10920   0.000275        Grass   
9      GR2     SH2      2      10920   0.000183        Grass   
10     GR2     TL1      1      10920   0.000092        Grass   
12     GR3     GS1   1356       4689   0.289187        Grass   
14     GR3     GS2      9       4689   0.001919        Grass   
15     GS1     GR1   1225       2200   0.556818  Grass-Shrub   
16     GS1     SH2    824       2200   0.374545  Grass-Shrub   
17     GS1     NB9     86       2200   0.039091  Grass-Shrub   
20     GS1     TL1      8       2200   0.003636  Grass-S

In [8]:
mm_inside = profile['model2model_inside']
mm_mat = mm_inside.pivot_table(index='MODEL23', columns='MODEL24', values='row_share', fill_value=0)
mm_mat = mm_mat.loc[sorted(mm_mat.index), sorted(mm_mat.columns)]  # tidy ordering
print("\n== model→model row-normalized (inside) ==")
print(mm_mat.round(3))



== model→model row-normalized (inside) ==
MODEL24    GR1    GR2    GR3    GS1    GS2    GS3    NB1  NB8    NB9    SH1  \
MODEL23                                                                       
GR1      0.564  0.000  0.000  0.051  0.051  0.000  0.000  0.0  0.205  0.000   
GR2      0.997  0.002  0.000  0.000  0.000  0.000  0.000  0.0  0.000  0.000   
GR3      0.007  0.702  0.000  0.289  0.002  0.000  0.000  0.0  0.000  0.000   
GS1      0.557  0.000  0.000  0.016  0.007  0.000  0.000  0.0  0.039  0.002   
GS2      0.191  0.000  0.000  0.189  0.000  0.000  0.000  0.0  0.000  0.000   
GS3      0.000  0.000  0.000  0.232  0.687  0.000  0.000  0.0  0.000  0.081   
NB1      0.000  0.032  0.007  0.000  0.035  0.012  0.778  0.0  0.000  0.000   
NB8      0.000  0.000  0.000  0.000  0.000  0.000  0.000  1.0  0.000  0.000   
NB9      0.000  0.000  0.000  0.000  0.000  0.000  0.000  0.0  1.000  0.000   
SH2      0.020  0.000  0.000  0.063  0.018  0.000  0.000  0.0  0.012  0.051   
SH4      

In [9]:
transition_tbl = profile['model2model_inside']

In [10]:
transition_tbl

Unnamed: 0,MODEL23,MODEL24,count,row_total,row_share
0,GR1,GR1,22,39,0.564103
1,GR1,NB9,8,39,0.205128
2,GR1,SH2,5,39,0.128205
3,GR1,GS1,2,39,0.051282
4,GR1,GS2,2,39,0.051282
...,...,...,...,...,...
122,TU5,TL6,3986,13508,0.295084
123,TU5,TL4,25,13508,0.001851
124,TU5,SH2,22,13508,0.001629
125,TU5,GS2,1,13508,0.000074


In [11]:
transition_tbl = profile['model2model_inside'].loc[profile['model2model_inside']['count'] > 100]

In [12]:
transition_tbl

Unnamed: 0,MODEL23,MODEL24,count,row_total,row_share
5,GR2,GR1,10885,10920,0.996795
11,GR3,GR2,3292,4689,0.702069
12,GR3,GS1,1356,4689,0.289187
15,GS1,GR1,1225,2200,0.556818
16,GS1,SH2,824,2200,0.374545
23,GS2,SH2,10840,30154,0.359488
24,GS2,TL1,7810,30154,0.259004
25,GS2,GR1,5748,30154,0.190621
26,GS2,GS1,5704,30154,0.189162
33,GS3,GS2,6527,9503,0.686836


In [13]:
# start from your table (or the full one)
mm = profile['model2model_inside'].copy()   # cols: MODEL23, MODEL24, count, row_total, row_share

# MODEL -> family map
fam_map = fbfm40_meta.set_index('MODEL')['general_type'].to_dict()

# annotate families
mm['FAM23'] = mm['MODEL23'].map(fam_map)
mm['FAM24'] = mm['MODEL24'].map(fam_map)

# keep only transitions to a different family (and your count > 100 filter)
cross_fam = (
    mm.loc[mm['FAM23'] != mm['FAM24']]
      .loc[lambda d: d['count'] > 100]
      .sort_values('count', ascending=False)
)

cross_fam.head(20)


Unnamed: 0,MODEL23,MODEL24,count,row_total,row_share,FAM23,FAM24
67,SH5,GS1,30852,62653,0.492427,Shrub,Grass-Shrub
68,SH5,GS2,19154,62653,0.305716,Shrub,Grass-Shrub
58,SH4,GS2,16633,30579,0.543935,Shrub,Grass-Shrub
59,SH4,GS1,12662,30579,0.414075,Shrub,Grass-Shrub
23,GS2,SH2,10840,30154,0.359488,Grass-Shrub,Shrub
24,GS2,TL1,7810,30154,0.259004,Grass-Shrub,Timber Litter
25,GS2,GR1,5748,30154,0.190621,Grass-Shrub,Grass
120,TU5,TL1,5376,13508,0.397986,Timber-Understory,Timber Litter
122,TU5,TL6,3986,13508,0.295084,Timber-Understory,Timber Litter
12,GR3,GS1,1356,4689,0.289187,Grass,Grass-Shrub


In [14]:
cross_fam_summary = (
    cross_fam.groupby(['FAM23','FAM24'])['count']
             .sum()
             .sort_values(ascending=False)
)
cross_fam_summary.head(20)


FAM23              FAM24        
Shrub              Grass-Shrub      79541
Grass-Shrub        Shrub            12433
Timber-Understory  Timber Litter    10048
Grass-Shrub        Timber Litter     7810
                   Grass             6973
Grass              Grass-Shrub       1356
Nonburnable        Shrub              611
Shrub              Timber Litter      387
Nonburnable        Grass-Shrub        194
                   Grass              178
Name: count, dtype: int64