In [1]:
import pandas as pd

# --- 0. read master ---
df = pd.read_csv("country_data.csv", dtype="string").fillna("")

# --- 1. keep only EU+EEA countries ---
eu_countries = df[df["Continent"] == "Europe"]

# --- 2. numeric conversion ---
num_cols = ["Minimum Sentence [POSSESSION]", "Maximum Sentence [POSSESSION]"]
eu_countries[num_cols] = eu_countries[num_cols].apply(pd.to_numeric, errors="coerce")

# --- 3. reshape: ONLY the minimum sentences, for all offence types ---
metrics = [c for c in df.columns if c.startswith("Minimum Sentence")]
tidy = eu_countries.melt(
    id_vars=["Country"],
    value_vars=metrics,
    var_name="Metric",
    value_name="Years"
)

# --- 4. drop rows where Years is NaN ---
tidy = tidy.dropna(subset=["Years"])

# --- 5. write file ---
tidy.to_csv("tidy_penalties_EU.csv", index=False)
print("✔ Wrote", len(tidy), "rows to tidy_penalties_EU.csv")


✔ Wrote 17 rows to tidy_penalties_EU.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eu_countries[num_cols] = eu_countries[num_cols].apply(pd.to_numeric, errors="coerce")


In [10]:
# TIDY-CSV EXPORT PANEL  (copy this single cell into a notebook)
# -------------------------------------------------------------
from pathlib import Path
import pandas as pd, ipywidgets as w
from IPython.display import display

# ── 0. read the wide master once ─────────────────────────────
df = pd.read_csv("country_data.csv", dtype="string").fillna("")

# columns that should stay wide (identifiers)
id_cols = ["Country", "Continent"]

# ── 1. widget: multi-select list of possible MEASURE columns ─
measure_options = [c for c in df.columns if c not in id_cols]

measure_select = w.SelectMultiple(
    options=measure_options,
    description="Metrics (⌘/Ctrl-click to select many):",
    rows=12,
    style={"description_width": "initial"},
    layout=w.Layout(width="700px", height="280px")
)

# ── 2. widget: output filename textbox ───────────────────────
file_box = w.Text(
    value="tidy_output.csv",
    description="Output file:",
    layout=w.Layout(width="300px")
)

# ── 3. Generate button + info area ───────────────────────────
run_btn  = w.Button(description="Generate tidy CSV", button_style="success")
info_out = w.Output()

# ── 4. callback that builds & writes the tidy file ───────────
def make_tidy(_):
    chosen = list(measure_select.value)
    if not chosen:
        with info_out:
            info_out.clear_output()
            print("⚠ Select at least one metric column.")
        return

    # 4a. reshape from wide to long
    tidy = df.melt(
        id_vars=id_cols,
        value_vars=chosen,
        var_name="Metric",
        value_name="Value"
    )

    # 4b. numeric conversion
    mask = tidy["Metric"].str.contains("Sentence", na=False)
    num  = pd.to_numeric(tidy.loc[mask, "Value"], errors="coerce")

    # ensure the column can hold mixed types (strings + floats)
    tidy["Value"] = tidy["Value"].astype("object")
    tidy.loc[mask, "Value"] = num

    # 4c. build tidy/filename.csv and ensure folder exists
    out_path = Path("tidy") / Path(file_box.value).with_suffix(".csv")
    out_path.parent.mkdir(exist_ok=True)   # creates tidy/ if absent

    tidy.to_csv(out_path, index=False, encoding="utf-8")

    # 4d. feedback
    with info_out:
        info_out.clear_output()
        print(f"✅ Wrote {len(tidy)} rows → {out_path}")
        display(tidy.head())

run_btn.on_click(make_tidy)

# ── 5. assemble the panel ───────────────────────────────────
panel = w.VBox([measure_select, file_box, run_btn, info_out])
display(panel)


VBox(children=(SelectMultiple(description='Metrics (⌘/Ctrl-click to select many):', layout=Layout(height='280p…