In [155]:
def _yr_cols_2017_2022(df):
    yrs = []
    for c in df.columns:
        m = re.search(r'(19|20)\d{2}', str(c))
        if m:
            y = int(m.group(0))
            if 2017 <= y <= 2022:
                yrs.append((c, y))
    # sort chronologically by the year found in the name
    yrs.sort(key=lambda t: t[1])
    return [c for c, _ in yrs]

def clean_interp_2017_2022(df: pd.DataFrame):
    """
    - Finds columns whose names contain a year in [2017..2022]
    - Replaces '..' with NaN, coerces to numeric
    - Row-wise linear interpolation across 2017..2022
    - Returns (clean_df, report_df)
    """
    year_cols = _yr_cols_2017_2022(df)
    if not year_cols:
        raise ValueError("No 2017–2022 year-like columns found.")

    out = df.copy()

    # Clean and coerce
    out[year_cols] = out[year_cols].replace(r'^\s*\.\.\s*$', np.nan, regex=True)
    out[year_cols] = out[year_cols].apply(pd.to_numeric, errors="coerce")

    # Missing report (before)
    before = out[year_cols].isna().sum()
    total_before = int(before.sum())
    rows_all_na_before = int(out[year_cols].isna().all(axis=1).sum())

    # Interpolate left↔right across years
    
    out[year_cols] = out[year_cols].interpolate(axis=1, limit_direction="both")

    # Missing report (after)
    after = out[year_cols].isna().sum()
    total_after = int(after.sum())
    rows_all_na_after = int(out[year_cols].isna().all(axis=1).sum())

    report = (
        pd.DataFrame({"missing_before": before, "missing_after": after})
          .assign(reduced=lambda d: d.missing_before - d.missing_after)
    )

    # Quick summary
    print("=== Year columns (2017–2022) ===")
    print(year_cols)
    print("\n=== Missing by column (before → after, reduced) ===")
    display(report)
    print("\n=== Overall ===")
    print(f"Total NaNs BEFORE: {total_before:,}")
    print(f"Total NaNs AFTER : {total_after:,}")
    print(f"Total Reduction  : {total_before - total_after:,}")
    print(f"Rows all-NaN across year cols BEFORE: {rows_all_na_before:,}")
    print(f"Rows all-NaN across year cols AFTER : {rows_all_na_after:,}")

    return out, report

# --- Example usage on any dataset shaped like World Bank data ---
# df_gdp_clean, gdp_report = clean_interp_2017_2022(df_gdp)
# df_edu_clean, edu_report = clean_interp_2017_2022(df_edu)
# df_rnd_clean, rnd_report = clean_interp_2017_2022(df_rnd)