In [32]:
# Explorating the different API requests
from urllib3 import request
import os
import json
import pandas as pd
import io
from dotenv import load_dotenv
import json
from pathlib import Path
import numpy as np
_ = load_dotenv()

In [2]:
API_KEY = os.getenv("STATS_NZ_API_KEY_PRIMARY")


In [None]:


link =  "data/regions.json"

regions_json = json.loads(Path(link).read_text("utf-8"))

In [176]:

url = "https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_ECI_019,1.0/2023.99+15+14+13+12+18+17+16+09+08+07+06+05+04+03+02+01..77.99.99?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = url
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

ethnicity_data = response.data.decode("utf-8")

df = pd.read_csv(io.StringIO(ethnicity_data))

In [183]:
transformed_df = df.copy()

COLS = {
    "Ethnicity":"ethnicity"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "OBS_VALUE": "count"
}

EXCLUDE = [
    "Not elsewhere included"
    , "Total stated - ethnicity"
]

RENAME = {
    "Middle Eastern/Latin American/African":"mena"
    , "Other ethnicity nec":"_other"
    , "Total - ethnicity":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}

transformed_df = transformed_df[COLS.keys()]\
    .rename(columns=COLS)\
    .query(f"ethnicity not in {EXCLUDE}")\
    .pivot(index="region", columns="ethnicity", values="count").reset_index()\
    .assign(other=lambda x: x["Other ethnicity nec"] + x["New Zealander"])\
    .rename(columns=RENAME)\
    .drop(columns=["_other", "New Zealander"])\
    

#transformed_df#.to_json(orient="records")
transformed_df

ethnicity,region,asian,nz_european,mena,māori,pasifika,total,other
0,Area Outside Region,18,459,6,420,24,633,9
1,Auckland Region,518178,825144,44718,203544,275079,1656486,15174
2,Bay of Plenty Region,29262,240087,3954,102387,14202,334140,3558
3,Canterbury Region,86430,522498,10299,69060,23868,651027,8220
4,Gisborne Region,1968,28905,381,28029,2841,51135,465
5,Hawke's Bay Region,11361,128415,1398,50076,10902,175074,2118
6,Manawatū-Whanganui Region,19329,196431,2394,63177,12522,251412,3339
7,Marlborough Region,2667,42450,738,7344,1821,49431,744
8,Nelson Region,4533,44544,714,6252,1485,52584,705
9,Northland Region,9345,141594,1263,72612,9486,194007,2211


In [249]:
#[x["properties"]["name"] in transformed_df.region.unique() for x in regions_json["features"]]

for region in regions_json["features"]:
    region_name = region["properties"]["name"]
    print(region_name)
    region_ethnicity_data = transformed_df.query("region == @region_name")

    if len(region_ethnicity_data) == 1:

        ethnicity = region_ethnicity_data.to_dict(orient='records')[0]


        region["properties"]["ethnicity"]=ethnicity

with open("data/regions_with_ethnicity.json", "w", encoding="utf-8") as f:
    json.dump(regions_json, f)

Northland Region
Auckland Region
Waikato Region
Bay of Plenty Region
Gisborne Region
Hawke's Bay Region
Taranaki Region
Manawatū-Whanganui Region
Wellington Region
West Coast Region
Canterbury Region
Otago Region
Southland Region
Tasman Region
Nelson Region
Marlborough Region
Area Outside Region


In [148]:


sa3_json = json.loads(Path("data/sa3.json").read_text("utf-8"))

url = "https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_POP_003,1.0/2023..777+69+61+5+4+3+2+1.99.99?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = url
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

ethnicity_data = response.data.decode("utf-8")

df = pd.read_csv(io.StringIO(ethnicity_data))

#print("+".join(all_codes[0:10]))

#lgbt= f"https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1.{"+".join(all_codes[0:100])}.2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

lgbt="https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1..2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = lgbt
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY
    }
)

lgbt_data = response.data.decode("utf-8")

lgbt_df = pd.read_csv(io.StringIO(lgbt_data))

In [149]:
transformed_df = df.copy()

all_codes = [x["properties"]["SA32025_V1_00"] for x in sa3_json["features"]]

COLS = {
    "Ethnicity":"ethnicity"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "CEN23_GEO_004": "region_code"
    , "OBS_VALUE": "count"
}

EXCLUDE = [
    "Not elsewhere included"
]

RENAME = {
    "Middle Eastern/Latin American/African":"mena"
    , "Other ethnicity nec":"_other"
    , "Total stated - ethnicity":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}



transformed_df = transformed_df[COLS.keys()]\
    .rename(columns=COLS)\
    .query(f"ethnicity not in {EXCLUDE}")\
    .pivot(index=["region", "region_code"], columns="ethnicity", values="count")\
    .reset_index()\
    .assign(other=lambda x: x["Other ethnicity nec"] + x["New Zealander"])\
    .rename(columns=RENAME)\
    .query("asian.notna()")\
    .drop(columns=["_other", "New Zealander"])\
    

#transformed_df#.to_json(orient="records")
#transformed_df.query("region == 'Kaikoura District'")
transformed_df#.Ethnicity.unique()


ethnicity,region,region_code,asian,nz_european,mena,māori,pasifika,total,other
0,Abbotsford,62140,144.0,2682.0,9.0,285.0,87.0,2916.0,27.0
1,Acacia Bay,53200,75.0,1629.0,9.0,216.0,18.0,1770.0,30.0
2,Addington-Middleton,61110,1734.0,3612.0,183.0,690.0,276.0,5898.0,63.0
3,Ahuriri,54250,54.0,987.0,12.0,180.0,30.0,1155.0,27.0
4,Aidanfield,61180,1425.0,2367.0,39.0,177.0,75.0,3870.0,42.0
...,...,...,...,...,...,...,...,...,...
993,Ōtorohanga District East,53060,396.0,5934.0,51.0,2508.0,243.0,7827.0,105.0
994,Ōtorohanga District West,53040,57.0,2013.0,6.0,876.0,87.0,2586.0,42.0
995,Ōtāhuhu,51830,4077.0,2916.0,243.0,2472.0,7335.0,14778.0,54.0
996,Ōwhata,53660,603.0,3915.0,54.0,3102.0,381.0,6468.0,72.0


In [159]:
lgbt_df

Unnamed: 0,STRUCTURE,STRUCTURE_ID,STRUCTURE_NAME,ACTION,CEN23_TBT_IND_003,Variable codes,CEN23_TBT_GEO_006,Area,CEN23_YEAR_001,Census year,OBS_VALUE,Observation Value,OBS_STATUS,Observation Status
0,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb1,LGBTIQ+,182300,Fencourt,2023,2023,9.0,,,
1,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rbTS,Total stated - rainbow/LGBTIQ+ indicator,2029,Inlets Marlborough District,2023,2023,21.0,,,
2,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb2,Not LGBTIQ+,2032,Ngākuta Bay,2023,2023,54.0,,,
3,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,246700,Eastern Bays,2023,2023,102.0,,,
4,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb1,LGBTIQ+,323500,Middleton,2023,2023,21.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17687,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,151800,Massey Road West,2023,2023,627.0,,,
17688,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rbTS,Total stated - rainbow/LGBTIQ+ indicator,61260,Opawa,2023,2023,1086.0,,,
17689,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rbTS,Total stated - rainbow/LGBTIQ+ indicator,030,Hastings District,2023,2023,52680.0,,,
17690,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb2,Not LGBTIQ+,1082,Snells Beach,2023,2023,2562.0,,,


In [183]:
# LGBT

lgbt_transformed_df = lgbt_df

LGBT_COLS = {
    "Variable codes":"preference"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "CEN23_TBT_GEO_006": "region_code"
    , "OBS_VALUE": "count"
}

LGBT_EXCLUDE = [
    #"Not elsewhere included"
]

LGBT_RENAME = {
    "LGBTIQ+":"lgbt"
    , "Not LGBTIQ+":"non_lgbt"
    #, "Total stated - rainbow/LGBTIQ+ indicator":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}


lgbt_transformed_df = lgbt_transformed_df[LGBT_COLS.keys()]\
    .rename(columns=LGBT_COLS)\
    .query(f"preference not in {LGBT_EXCLUDE} and region_code in {all_codes}")\
    .pivot(index=["region", "region_code"], columns="preference", values="count")\
    .reset_index()\
    .rename(columns=LGBT_RENAME)\
    .assign(total=lambda x: (x["non_lgbt"] + x["lgbt"]))\
    .query("lgbt.notna()")\
    .drop(columns=["LGBTIQ+ status unidentifiable", "Total stated - rainbow/LGBTIQ+ indicator"])

lgbt_transformed_df

preference,region,region_code,lgbt,non_lgbt,total
0,Abbotsford,62140,78.0,2103.0,2181.0
1,Acacia Bay,53200,33.0,1287.0,1320.0
2,Addington-Middleton,61110,477.0,4116.0,4593.0
3,Ahuriri,54250,24.0,831.0,855.0
4,Aidanfield,61180,105.0,2865.0,2970.0
...,...,...,...,...,...
867,Ōtorohanga District East,53060,156.0,4830.0,4986.0
868,Ōtorohanga District West,53040,51.0,1575.0,1626.0
869,Ōtāhuhu,51830,408.0,8550.0,8958.0
870,Ōwhata,53660,153.0,3762.0,3915.0


In [185]:

for region in sa3_json["features"]:
    region_name = region["properties"]["name"]
    region_code = int(region["properties"]["SA32025_V1_00"])
    region_code_2 = region["properties"]["SA32025_V1_00"]
    

    region_ethnicity_data = transformed_df.query("region == @region_name and region_code == @region_code")
    region_lgbt_data = lgbt_transformed_df.query("region == @region_name and region_code == @region_code_2")

    #print(len(region_ethnicity_data))
    if len(region_ethnicity_data) == 1:

        ethnicity = region_ethnicity_data.to_dict(orient='records')[0]

        region["properties"]["ethnicity"]=ethnicity
    else:
        region["properties"]["ethnicity"]=None

    if len(region_lgbt_data) == 1:

        lgbt = region_lgbt_data.to_dict(orient='records')[0]

        region["properties"]["lgbt"]=lgbt
    else:
        region["properties"]["lgbt"]=None

with open("data/sa3_with_ethnicity.json", "w", encoding="utf-8") as f:
    json.dump(sa3_json,f)

In [67]:
sa3_json["features"][0]["properties"]["ethnicity"]

#for x in sa3_json["features"]:
    #print(x["properties"]["ethnicity"])
    #if x["properties"]["ethnicity"]["asian"] == np.nan:
    #    print(x["properties"]["ethnicity"])

{'region': 'Onerahi',
 'region_code': 50300,
 'asian': 342.0,
 'nz_european': 4731.0,
 'mena': 54.0,
 'māori': 2157.0,
 'pasifika': 321.0,
 'total': 6411.0,
 'other': 93.0}

In [89]:
all_codes = [x["properties"]["SA32025_V1_00"] for x in sa3_json["features"]]

#print("+".join(all_codes[0:10]))

lgbt= f"https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1.{"+".join(all_codes[0:10])}.2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = lgbt
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

lgbt_data = response.data.decode("utf-8")

lgbt_df = pd.read_csv(io.StringIO(lgbt_data))

In [90]:
lgbt_df

Unnamed: 0,STRUCTURE,STRUCTURE_ID,STRUCTURE_NAME,ACTION,CEN23_TBT_IND_003,Variable codes,CEN23_TBT_GEO_006,Area,CEN23_YEAR_001,Census year,OBS_VALUE,Observation Value,OBS_STATUS,Observation Status
0,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,50410,Rodney East,2023,2023,1395,,,
1,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,50300,Onerahi,2023,2023,891,,,
2,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,50330,Kaipara District North,2023,2023,1563,,,
3,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,50380,Barrier Islands,2023,2023,204,,,
4,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb1,LGBTIQ+,50390,Rodney North,2023,2023,213,,,
5,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb2,Not LGBTIQ+,50410,Rodney East,2023,2023,6105,,,
6,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb2,Not LGBTIQ+,50380,Barrier Islands,2023,2023,825,,,
7,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rbTS,Total stated - rainbow/LGBTIQ+ indicator,50320,Waipū,2023,2023,2409,,,
8,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb3,LGBTIQ+ status unidentifiable,50320,Waipū,2023,2023,405,,,
9,DATAFLOW,STATSNZ:CEN23_TBT_008(1.0),"Totals by topic for individuals, (RC, TALB, UR...",I,rb1,LGBTIQ+,50340,Kaipara District South,2023,2023,186,,,
