In [67]:
import sys
from pathlib import Path

p = Path.cwd().resolve()
repo_root = next((parent for parent in [p] + list(p.parents) if (parent / ".git").exists()), None)
if repo_root is None:
    raise RuntimeError("Repo root not found. Open the repo folder in VS Code.")

sys.path.insert(0, str(repo_root))
print("Repo root:", repo_root)

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

Repo root: C:\Users\harri\OneDrive - Imperial College London\Year 3 Group Project\Group_Project_Y3


In [68]:
county_codes = pd.read_csv(repo_root / "cleaned/00_codes/county_codes.csv")
county_codes

Unnamed: 0,county_code,county_kts,county_name
0,201,10030210101000,Powiat bolesławiecki
1,202,10030210302000,Powiat dzierżoniowski
2,203,10030210203000,Powiat głogowski
3,204,10030210204000,Powiat górowski
4,205,10030210105000,Powiat jaworski
...,...,...,...
375,3217,10023216417000,Powiat wałecki
376,3218,10023216418000,Powiat łobeski
377,3261,10023216361000,Powiat m. Koszalin
378,3262,10023216562000,Powiat m. Szczecin


**SECTION 1 - UNEMPLOYMENT BY POWIAT**

We will take both ru measures and sum by powait (to check sum consistent). then taking one we will merge with powiat measures of economically active -> unemployment rate by powiat.

In [69]:
p1946 = pd.read_csv(repo_root / "cleaned/03_01_outcome_data/ru_sex_age_p1946.csv", index_col=0)
p1947 = pd.read_csv(repo_root / "cleaned/03_01_outcome_data/ru_sex_ed_p1947.csv", index_col=0)

# Create county_code column for each
for df in [p1946, p1947]:
    df["county_code"] = df["code"].apply(lambda x: int(str(x)[:-3]))

# Now take only sex total and age/education total
p1946 = p1946[
    (p1946["age"]=="total") & (p1946["sex"]=="total")
]

p1947 = p1947[
    (p1947["education"]=="total") & (p1947["sex"]=="total")
]

ru_series_p1946 = p1946.groupby(["county_code", "year"])["count"].sum()
ru_series_p1947 = p1947.groupby(["county_code", "year"])["count"].sum()

(ru_series_p1946 == ru_series_p1947).sum()

np.int64(9880)

Given they are the same we will use the first

In [70]:
ru_series_p1946 = ru_series_p1946.reset_index()
ru_series_p1946 = ru_series_p1946.rename(columns={"count": "ru"})
ru_series_p1946

Unnamed: 0,county_code,year,ru
0,201,2000,8886.0
1,201,2001,9218.0
2,201,2002,8574.0
3,201,2003,8803.0
4,201,2004,7966.0
...,...,...,...
9875,3263,2021,559.0
9876,3263,2022,521.0
9877,3263,2023,545.0
9878,3263,2024,585.0


Gather required columns from poplation powiat table

In [71]:
ptot = pd.read_csv(repo_root / "cleaned/03_01_outcome_tables/population_powiat.csv", index_col=0)

ptot_cols = [
    "county_code", "year",
    "county_kts", "county_name",
    "tp1_nc_pop", "tp2_yr_pop", "tp3b_nc_active", "tp4b_active", "tp5b_active", "tp6b_active"
]

ptot = ptot[ptot_cols]

Now merge the tables:

In [72]:
ru_series_p1946.set_index(["county_code", "year"], inplace=True)
ptot.set_index(["county_code", "year"], inplace=True)

ru_powiat = ru_series_p1946.join(ptot, how="left").reset_index()

# Save
ru_powiat.to_csv(repo_root / "cleaned/03_01_outcome_tables/unemploy_table.csv")

ru_powiat

Unnamed: 0,county_code,year,ru,county_kts,county_name,tp1_nc_pop,tp2_yr_pop,tp3b_nc_active,tp4b_active,tp5b_active,tp6b_active
0,201,2000,8886.0,1.003021e+13,Powiat bolesławiecki,76739.0,64323.0,40280.0,37565.0,,
1,201,2001,9218.0,1.003021e+13,Powiat bolesławiecki,76739.0,64858.0,40280.0,37877.0,,
2,201,2002,8574.0,1.003021e+13,Powiat bolesławiecki,76739.0,71744.0,40280.0,41898.0,,
3,201,2003,8803.0,1.003021e+13,Powiat bolesławiecki,76739.0,72499.0,40280.0,42339.0,,
4,201,2004,7966.0,1.003021e+13,Powiat bolesławiecki,76739.0,73078.0,40280.0,42678.0,,
...,...,...,...,...,...,...,...,...,...,...,...
9875,3263,2021,559.0,1.002322e+13,Powiat m. Świnoujście,35988.0,34960.0,17737.0,19752.0,19578.0,19752.0
9876,3263,2022,521.0,1.002322e+13,Powiat m. Świnoujście,35988.0,34619.0,17737.0,19560.0,19664.0,19839.0
9877,3263,2023,545.0,1.002322e+13,Powiat m. Świnoujście,35988.0,34317.0,17737.0,19389.0,19664.0,19839.0
9878,3263,2024,585.0,1.002322e+13,Powiat m. Świnoujście,35988.0,34148.0,17737.0,19294.0,19499.0,19673.0


In [73]:
ru_series_p1946

Unnamed: 0_level_0,Unnamed: 1_level_0,ru
county_code,year,Unnamed: 2_level_1
201,2000,8886.0
201,2001,9218.0
201,2002,8574.0
201,2003,8803.0
201,2004,7966.0
...,...,...
3263,2021,559.0
3263,2022,521.0
3263,2023,545.0
3263,2024,585.0
