In [None]:
# Explorating the different API requests
from urllib3 import request
import os
import json
import pandas as pd
import io
from dotenv import load_dotenv
import json
from pathlib import Path
import numpy as np 
_ = load_dotenv()

In [None]:
API_KEY = os.getenv("STATS_NZ_API_KEY_PRIMARY")


In [None]:


link =  "data/regions.json"

regions_json = json.loads(Path(link).read_text("utf-8"))

In [None]:

url = "https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_ECI_019,1.0/2023.99+15+14+13+12+18+17+16+09+08+07+06+05+04+03+02+01..77.99.99?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = url
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

ethnicity_data = response.data.decode("utf-8")

df = pd.read_csv(io.StringIO(ethnicity_data))

In [None]:
transformed_df = df.copy()

COLS = {
    "Ethnicity":"ethnicity"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "OBS_VALUE": "count"
}

EXCLUDE = [
    "Not elsewhere included"
    , "Total stated - ethnicity"
]

RENAME = {
    "Middle Eastern/Latin American/African":"mena"
    , "Other ethnicity nec":"_other"
    , "Total - ethnicity":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}

transformed_df = transformed_df[COLS.keys()]\
    .rename(columns=COLS)\
    .query(f"ethnicity not in {EXCLUDE}")\
    .pivot(index="region", columns="ethnicity", values="count").reset_index()\
    .assign(other=lambda x: x["Other ethnicity nec"] + x["New Zealander"])\
    .rename(columns=RENAME)\
    .drop(columns=["_other", "New Zealander"])\
    

#transformed_df#.to_json(orient="records")
transformed_df

In [None]:
#[x["properties"]["name"] in transformed_df.region.unique() for x in regions_json["features"]]

for region in regions_json["features"]:
    region_name = region["properties"]["name"]
    print(region_name)
    region_ethnicity_data = transformed_df.query("region == @region_name")

    if len(region_ethnicity_data) == 1:

        ethnicity = region_ethnicity_data.to_dict(orient='records')[0]


        region["properties"]["ethnicity"]=ethnicity

with open("data/regions_with_ethnicity.json", "w", encoding="utf-8") as f:
    json.dump(regions_json, f)

In [None]:


sa3_json = json.loads(Path("data/sa3.json").read_text("utf-8"))

url = "https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_POP_003,1.0/2023..777+69+61+5+4+3+2+1.99.99?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = url
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

ethnicity_data = response.data.decode("utf-8")

df = pd.read_csv(io.StringIO(ethnicity_data))

#print("+".join(all_codes[0:10]))

#lgbt= f"https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1.{"+".join(all_codes[0:100])}.2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

lgbt="https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1..2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = lgbt
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY
    }
)

lgbt_data = response.data.decode("utf-8")

lgbt_df = pd.read_csv(io.StringIO(lgbt_data))

In [None]:
transformed_df = df.copy()

all_codes = [x["properties"]["SA32025_V1_00"] for x in sa3_json["features"]]

COLS = {
    "Ethnicity":"ethnicity"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "CEN23_GEO_004": "region_code"
    , "OBS_VALUE": "count"
}

EXCLUDE = [
    "Not elsewhere included"
] 

RENAME = {
    "Middle Eastern/Latin American/African":"mena"
    , "Other ethnicity nec":"_other"
    , "Total stated - ethnicity":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}



transformed_df = transformed_df[COLS.keys()]\
    .rename(columns=COLS)\
    .query(f"ethnicity not in {EXCLUDE}")\
    .pivot(index=["region", "region_code"], columns="ethnicity", values="count")\
    .reset_index()\
    .assign(other=lambda x: x["Other ethnicity nec"] + x["New Zealander"])\
    .rename(columns=RENAME)\
    .query("asian.notna()")\
    .drop(columns=["_other", "New Zealander"])\
    

#transformed_df#.to_json(orient="records")
#transformed_df.query("region == 'Kaikoura District'")
transformed_df#.Ethnicity.unique()


In [None]:
lgbt_df

In [None]:
# LGBT

lgbt_transformed_df = lgbt_df

LGBT_COLS = {
    "Variable codes":"preference"
    , "Area": "region"
    #, "CEN23_GEO_002"
    , "CEN23_TBT_GEO_006": "region_code"
    , "OBS_VALUE": "count"
}

LGBT_EXCLUDE = [
    #"Not elsewhere included"
]

LGBT_RENAME = {
    "LGBTIQ+":"lgbt"
    , "Not LGBTIQ+":"non_lgbt"
    #, "Total stated - rainbow/LGBTIQ+ indicator":"total"
    , "Pacific Peoples": "pasifika"
    , "Asian": "asian"
    , "European": "nz_european"
    , "Māori": "māori"
}


lgbt_transformed_df = lgbt_transformed_df[LGBT_COLS.keys()]\
    .rename(columns=LGBT_COLS)\
    .query(f"preference not in {LGBT_EXCLUDE} and region_code in {all_codes}")\
    .pivot(index=["region", "region_code"], columns="preference", values="count")\
    .reset_index()\
    .rename(columns=LGBT_RENAME)\
    .assign(total=lambda x: (x["non_lgbt"] + x["lgbt"]))\
    .query("lgbt.notna()")\
    .drop(columns=["LGBTIQ+ status unidentifiable", "Total stated - rainbow/LGBTIQ+ indicator"])

lgbt_transformed_df

In [None]:

for region in sa3_json["features"]:
    region_name = region["properties"]["name"]
    region_code = int(region["properties"]["SA32025_V1_00"])
    region_code_2 = region["properties"]["SA32025_V1_00"]
    

    region_ethnicity_data = transformed_df.query("region == @region_name and region_code == @region_code")
    region_lgbt_data = lgbt_transformed_df.query("region == @region_name and region_code == @region_code_2")

    #print(len(region_ethnicity_data))
    if len(region_ethnicity_data) == 1:

        ethnicity = region_ethnicity_data.to_dict(orient='records')[0]

        region["properties"]["ethnicity"]=ethnicity
    else:
        region["properties"]["ethnicity"]=None

    if len(region_lgbt_data) == 1:

        lgbt = region_lgbt_data.to_dict(orient='records')[0]

        region["properties"]["lgbt"]=lgbt
    else:
        region["properties"]["lgbt"]=None

with open("data/sa3_with_ethnicity.json", "w", encoding="utf-8") as f:
    json.dump(sa3_json,f)

In [None]:
sa3_json["features"][0]["properties"]["ethnicity"]

#for x in sa3_json["features"]:
    #print(x["properties"]["ethnicity"])
    #if x["properties"]["ethnicity"]["asian"] == np.nan:
    #    print(x["properties"]["ethnicity"])

In [None]:
all_codes = [x["properties"]["SA32025_V1_00"] for x in sa3_json["features"]]

#print("+".join(all_codes[0:10]))

lgbt= f"https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/rb3+rbTS+rb2+rb1.{"+".join(all_codes[0:10])}.2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = lgbt
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

lgbt_data = response.data.decode("utf-8")

lgbt_df = pd.read_csv(io.StringIO(lgbt_data))

In [None]:
lgbt_df

In [None]:
# https://explore.data.stats.govt.nz/vis?pg=0&snb=1&df[ds]=ds-nsiws-disseminate&df[id]=CEN23_TBT_008&df[ag]=STATSNZ&df[vs]=1.0&isAvailabilityDisabled=false&dq=raTotal%2Bra00%2Bra02%2BegTS%2Beg6%2Beg5%2Beg4%2Beg3%2Beg2%2Beg1%2Brb3%2BrbTS%2Brb2%2Brb1.05.2023&to[TIME]=false&hc[Variable%20codes]=&tm=rainbow%20sa3

all_stats = "https://api.data.stats.govt.nz/rest/data/STATSNZ,CEN23_TBT_008,1.0/raTotal+ra00+ra02+egTS+eg6+eg5+eg4+eg3+eg2+eg1+rb3+rbTS+rb2+rb1..2023?dimensionAtObservation=AllDimensions&format=csvfilewithlabels"

response = request(
    method="GET"
    , url = all_stats
    , headers={
        "Ocp-Apim-Subscription-Key":API_KEY

    }
)

all_data = response.data.decode("utf-8")

all_df = pd.read_csv(io.StringIO(all_data))

In [None]:
all_df.head(2)

In [None]:
all_df.CEN23_TBT_GEO_006.unique()