In [1]:
import pandas as pd
from pathlib import Path

from prisons_analysis_utils import load_and_clean_prison_data, compute_occupancy_rates, over_100_occupancy, top_worst_countries, compare_regions, trend_nations

In [2]:
# Base directory configuration
BASE_PC_DIR = Path(r'C:\Users\Work\Documents\GitHub\UNI\FDS02Q001\data_viz')
BASE_NB_DIR = Path(r'C:\Users\g.evola\repo\UNI\FDS02Q001\data_viz')

# Choose based on pc used
BASE_DIR = BASE_PC_DIR
DATA_DIR = BASE_DIR / 'datasets' / 'prison_statistics'

year = 2022

## 1 - Load and clean data 

In [3]:
df_clean = load_and_clean_prison_data(DATA_DIR / 'crim_pris_cap$defaultview_linear.csv')

df = compute_occupancy_rates(df_clean)

# Save for analysis
df.to_csv(DATA_DIR / f"prison_occupancy_dual_2014_{year}.csv", index=False)
print(f"Saved 'prison_occupancy_dual_2014_{year}.csv'")

occ = pd.read_csv(DATA_DIR / "prison_occupancy_dual_2014_2022.csv")

Loaded 1,312 observations for 41 countries (YEAR <= 2022)
Columns: 12 total | Sample: ['DATAFLOW', 'LAST UPDATE', 'freq', 'indic_cr', 'unit', 'geo']...
Occupancy dataset: 308 observations, 39 countries
Years: 2014 → 2022
Columns: ['geo', 'YEAR', 'PRISONERS_NUM', 'CAPACITY_NUM', 'OCC_ABS', 'PRISONERS_100K', 'CAPACITY_100K', 'OCC_PER_100K']
Saved 'prison_occupancy_dual_2014_2022.csv'


## 2 - How many EU/European countries exceed 100% prison occupancy rate in the 2022?

In [4]:
over100 = over_100_occupancy(occ, year)

# Save for Plotly/Datawrapper line chart
over100.to_csv(DATA_DIR / f"over100_{year}.csv", index=False)
print(f"Saved 'over100_{year}.csv")

Number of countries with occupancy above 100% in 2022: 13 out of 35
Saved 'over100_2022.csv


## 3 - Which countries have been most successful in maintained occupancy rates below capacity in the 2022?

In [5]:
top_10 = top_worst_countries(occ, year, 10, "top")

# Save for Plotly/Datawrapper line chart
top_10.to_csv(DATA_DIR / f"top10_{year}.csv", index=False)
print(f"Saved top10_{year}.csv")

Number of top 10 occupancy countries in 2022: 10 out of 35
Saved top10_2022.csv


## 4 - What are the regional differences (North vs South Europe) in prison overcrowding?

In [6]:
north = [
    "Norway","Sweden","Finland","Denmark",
    "Ireland","United Kingdom","Northern Ireland (UK) (NUTS 2021)",
    "Netherlands","Estonia","Latvia","Lithuania",
    "Germany","Austria","France","Belgium","Luxembourg","Switzerland",
    "Poland","Czechia","Slovakia"
]
    
south = [
    "Italy","Spain","Portugal","Greece",
    "Croatia","Slovenia","Romania","Bulgaria","Hungary","Serbia",
    "Bosnia and Herzegovina", "Montenegro", "North Macedonia",
    "Albania", "Kosovo*", "Türkiye"
]

df_regions = compare_regions(occ, north, south, year)

# Save for Plotly/Datawrapper line chart
df_regions.to_csv(DATA_DIR / f"compare_regions_{year}.csv", index=False)
print(f"Saved compare_regions_{year}.csv")

Region stats for 2022:
  REGION    mean    min     max  count
0  North   90.39  62.18  118.96     18
1  Other  156.14  58.72  263.64      4
2  South   96.35  68.34  117.70     13
Saved compare_regions_2022.csv


# 5 - Prison Occupancy Trends 2014-2022: European countries with prison occupancy < 1000

In [7]:
countries_focus = ["Iceland", "Malta", "Liechtenstein", "Cyprus"]

df_mini_trend = trend_nations(occ, countries_focus)

# Save for Plotly/Datawrapper line chart
df_mini_trend.to_csv(DATA_DIR / f"mini_countries_trend_{year}.csv", index=False)
print(f"Saved mini_countries_trend_{year}.csv")

Trends extracted for 4 focus countries: ['Iceland', 'Malta', 'Liechtenstein', 'Cyprus']
Saved mini_countries_trend_2022.csv
