In [1]:
from pathlib import Path
import pandas as pd

DATA = Path("country_data.csv")

def load():
    return pd.read_csv(DATA, dtype="string").fillna("")

def save(df):
    # simple overwrite + backup
    DATA.replace(DATA.with_suffix(".bak"), missing_ok=True)
    df.to_csv(DATA, index=False, encoding="utf-8")
    print("✔ saved", DATA.name)

def show_country(df, name):
    row = df[df["Country"].str.casefold() == name.casefold()]
    return row.T  # transpose for tall view

def edit_country(df, name, **fields):
    mask = df["Country"].str.casefold() == name.casefold()
    if not mask.any():
        df.loc[len(df), "Country"] = name
        mask = df["Country"] == name
    for col, val in fields.items():
        df.loc[mask, col] = val
    print("✅ updated", name)


In [2]:
df = load()
core = ["Is possession a criminal offence? ", "Punishment [POSSESSION]"]
df[df[core].eq("").any(axis=1)][["Country"]]


Unnamed: 0,Country
15,United Kingdom
16,Russian Federation
17,New Zealand
18,Australia
19,Nigeria
21,Colombia
22,Ecuador
23,Bolivia
24,Chile
25,Brazil


In [22]:
show_country(df, "Australia")

Unnamed: 0,18
Continent,Oceania
Country,Australia
Is use a criminal offence? [USE],
Is use an administrative offence? [USE],
Punishment [USE],
What are the alternatives to punishment for the offence? [USE],
Penalty varies by drug? [USE],
Penalty varies by quantity? [USE],
Penalty (response) varies for addiction? [USE],
Penalty varies for recidivism? [USE],


In [27]:
import pandas as pd
pd.set_option('display.max_colwidth', None)      # unlimited chars
pd.set_option('display.colheader_justify', 'right')

def show_country(name, path="country_data.csv"):
    df = pd.read_csv(path, dtype="string").fillna("")
    row = df[df["Country"] == name]
    return row.T      # transpose for tall view

import ipywidgets as w
w.interact(
    show_country,
    name=sorted(pd.read_csv("country_data.csv", dtype="string")["Country"])
)



interactive(children=(Dropdown(description='name', options=('Australia', 'Austria', 'Belgium', 'Bolivia', 'Bra…

<function __main__.show_country(name, path='country_data.csv')>

In [10]:
from pathlib import Path
import pandas as pd
import ipywidgets as w

DATA = Path("country_data.csv")

def load():
    return pd.read_csv(DATA, dtype="string").fillna("")

def save(df):
    DATA.replace(DATA.with_suffix(".bak"), missing_ok=True)
    df.to_csv(DATA, index=False, encoding="utf-8")
    print("✔ saved", DATA.name)

df = load()

In [18]:
import pandas as pd
import pycountry_convert as pc   # pip install pycountry_convert

df = pd.read_csv("country_data.csv", dtype="string").fillna("")

def to_continent(country_name):
    try:
        code2   = pc.country_name_to_country_alpha2(country_name, cn_name_format="default")
        cont_cd = pc.country_alpha2_to_continent_code(code2)      # "EU", "NA", "SA", etc.
        return {
            "AF": "Africa",
            "AS": "Asia",
            "EU": "Europe",
            "NA": "North America",
            "SA": "South America",
            "OC": "Oceania",
        }[cont_cd]
    except KeyError:
        return ""   # leaves cell blank if name unrecognised

df["Continent"] = df["Country"].apply(to_continent)
df.to_csv("country_data.csv", index=False, encoding="utf-8")
print("✔ Continent column filled for", df["Continent"].ne("").sum(), "countries")


✔ Continent column filled for 29 countries


In [19]:
import pandas as pd, pathlib

df = pd.read_csv("country_data.csv")
meta = pd.DataFrame({
    "variable": df.columns,
    "type":     df.dtypes.astype(str),
    "description": "",          # fill manually
    "coding / units": "",
    "source":  "",
    "transform": ""
})
meta.to_csv("codebook/codebook_variables.csv", index=False)


In [1]:
def calculate_cpei_scores(df):
    """Calculate CPEI using your current data structure"""
    
    # Legal Framework Score (your existing data)
    legal_use_cols = [col for col in df.columns if '[USE]' in col]
    legal_poss_cols = [col for col in df.columns if '[POSSESSION]' in col]
    legal_supply_cols = [col for col in df.columns if '[SUPPLY]' in col]
    
    # Calculate dimension scores
    df['Legal_Framework'] = df[legal_use_cols + legal_poss_cols + legal_supply_cols].mean(axis=1)
    
    # Placeholder for new dimensions (add as you collect data)
    df['Enforcement'] = 0.5  # Update when you add enforcement data
    df['International'] = 0.5  # Update when you add international data
    df['Supply_Chain'] = 0.5  # Update when you add supply chain data
    df['Harm_Reduction'] = 0.5  # Update when you add harm reduction data
    
    # Calculate CPEI
    df['CPEI_Score'] = (
        df['Legal_Framework'] * 0.25 +
        df['Enforcement'] * 0.30 +
        df['International'] * 0.20 +
        df['Supply_Chain'] * 0.20 +
        df['Harm_Reduction'] * 0.05
    )
    
    df['Rank'] = df['CPEI_Score'].rank(method='min', ascending=False)
    
    return df

In [2]:
# Phase 1: Current data only
df_phase1 = calculate_cpei_scores(df)  # Uses your existing legal data
df_phase1.to_csv('results/cpei_phase1_legal_only.csv')


NameError: name 'df' is not defined