In [33]:
import sys
from pathlib import Path

p = Path.cwd().resolve()
repo_root = next((parent for parent in [p] + list(p.parents) if (parent / ".git").exists()), None)
if repo_root is None:
    raise RuntimeError("Repo root not found. Open the repo folder in VS Code.")

sys.path.insert(0, str(repo_root))
print("Repo root:", repo_root)

Repo root: /Users/samiullah/Group_Project_Y3


In [34]:
import pandas as pd
import numpy as np
muni_codes = pd.read_csv(f"{str(repo_root)}/cleaned/00_codes/muni_codes.csv")

In [35]:
econ_activity = pd.read_excel(f"{str(repo_root)}/raw/00_econ_activity_data/NARO_4305_XREL_20260203000828.xlsx", sheet_name="DATA")

# Taking County-Level Figures

In [36]:
econ_activity[econ_activity['Name'].str.contains('Powiat')]['Name'].unique()

<StringArray>
[ 'Powiat bolesławiecki', 'Powiat dzierżoniowski',      'Powiat głogowski',
       'Powiat górowski',       'Powiat jaworski',     'Powiat karkonoski',
 'Powiat kamiennogórski',        'Powiat kłodzki',       'Powiat legnicki',
       'Powiat lubański',
 ...
    'Powiat koszaliński',    'Powiat myśliborski',        'Powiat policki',
       'Powiat pyrzycki',     'Powiat sławieński',    'Powiat stargardzki',
   'Powiat szczecinecki',     'Powiat świdwiński',        'Powiat wałecki',
        'Powiat łobeski']
Length: 304, dtype: str

Only 304 out of 380.

# Aggregating Muni to County

Only muni codes do not end in 0

In [37]:
muni_econ_activity = econ_activity[econ_activity["Code"].astype(str).str[-1] != "0"]
muni_econ_activity.rename(columns={
    'Code':'muni_code',
    'Name':'muni_name',
    'Economic activity of the population':'econ_activity_group',
    'Value':'value'
},inplace=True)
muni_econ_activity = muni_econ_activity[['muni_code', 'muni_name', 'econ_activity_group', 'value']]

In [38]:
muni_econ_activity

Unnamed: 0,muni_code,muni_name,econ_activity_group,value
27,201011,Bolesławiec (1),total,31371.0
28,201011,Bolesławiec (1),economically active population,15865.0
29,201011,Bolesławiec (1),employed,15287.0
30,201011,Bolesławiec (1),unemployed,578.0
31,201011,Bolesławiec (1),economically inactive persons,12966.0
...,...,...,...,...
37759,3263011,Świnoujście (1),economically inactive persons,13537.0
37760,3263011,Świnoujście (1),unidentified status on the labour market,3530.0
37761,3263011,Świnoujście (1),activity rate,56.9
37762,3263011,Świnoujście (1),employment rate,54.3


In [39]:
# merge to codes
muni_econ_activity_merged = muni_codes.drop(columns=['muni_name']).merge(muni_econ_activity, on='muni_code', how='outer')
muni_econ_activity_merged

Unnamed: 0,muni_code,muni_kts,county_kts,county_code,county_name,muni_name,econ_activity_group,value
0,201011,10030210101011,10030210101000,201,Powiat bolesławiecki,Bolesławiec (1),total,31371.0
1,201011,10030210101011,10030210101000,201,Powiat bolesławiecki,Bolesławiec (1),economically active population,15865.0
2,201011,10030210101011,10030210101000,201,Powiat bolesławiecki,Bolesławiec (1),employed,15287.0
3,201011,10030210101011,10030210101000,201,Powiat bolesławiecki,Bolesławiec (1),unemployed,578.0
4,201011,10030210101011,10030210101000,201,Powiat bolesławiecki,Bolesławiec (1),economically inactive persons,12966.0
...,...,...,...,...,...,...,...,...
34205,3263011,10023216663011,10023216663000,3263,Powiat m. Świnoujście,Świnoujście (1),economically inactive persons,13537.0
34206,3263011,10023216663011,10023216663000,3263,Powiat m. Świnoujście,Świnoujście (1),unidentified status on the labour market,3530.0
34207,3263011,10023216663011,10023216663000,3263,Powiat m. Świnoujście,Świnoujście (1),activity rate,56.9
34208,3263011,10023216663011,10023216663000,3263,Powiat m. Świnoujście,Świnoujście (1),employment rate,54.3


In [40]:
muni_econ_activity_merged['econ_activity_group'].unique()

<StringArray>
[                                   'total',
           'economically active population',
                                 'employed',
                               'unemployed',
            'economically inactive persons',
 'unidentified status on the labour market',
                            'activity rate',
                          'employment rate',
                        'unemployment rate',
                                        nan]
Length: 10, dtype: str

If we really need the others, then we can aggregate the rates and values separately. Doubt we need.

In [46]:
county_econ_activity = muni_econ_activity_merged[muni_econ_activity_merged['econ_activity_group']=='economically active population'].groupby('county_code')['value'].sum().reset_index()
county_econ_activity.rename(columns={'value':'econ_active_population'}, inplace=True)

In [47]:
county_econ_activity

Unnamed: 0,county_code,econ_active_population
0,201,45666.0
1,202,45049.0
2,203,36610.0
3,204,25438.0
4,205,24914.0
...,...,...
375,3217,27376.0
376,3218,26202.0
377,3261,46947.0
378,3262,187291.0


Now have full representation of 380 counties.

NEXT:
- Compute representation by Powiat: join back to muni codes and see what % of muni's per Powiat are in the aggregate
- Do clustering