In [1]:
import json
import pandas as pd
import numpy as np

csv_name_altersklassen = "2019_altersklassen.csv"

df = pd.read_csv(csv_name_altersklassen, encoding='ISO-8859-1')

print(df.columns)

df = df.drop(columns=["Jahr", "Bevölkerungstyp", "Geschlecht", "Zivilstand"])

df = df.rename(columns={'Kanton (-) / Bezirk (>>) / Gemeinde (......)': 'Gemeinde'})

df['Kanton'] = np.nan
df['Bezirk'] = np.nan

for index, row in df.iterrows():
    if row['Gemeinde'].startswith('-'):
        current_kanton = row['Gemeinde'][1:].strip()
    elif row['Gemeinde'].startswith('>>'):
        current_bezirk = row['Gemeinde'][2:].strip()
    elif row['Gemeinde'].startswith('......'):
        
        df.at[index, 'Kanton'] = current_kanton
        df.at[index, 'Bezirk'] = current_bezirk
        
gemeinde_df = df[df['Gemeinde'].str.startswith('......')].copy()
gemeinde_df['Gemeinde'] = gemeinde_df['Gemeinde'].map(lambda x: x[6:].strip())

gemeinde_df[['Gemeindenummer', 'Gemeinde']] = gemeinde_df['Gemeinde'].str.split(' ', n=1, expand=True)

gemeinde_df = gemeinde_df.drop(columns=["Gemeindenummer"])

gemeinde_df.to_csv("2019_altersklassen_absolute.csv", index=False)

non_numeric_columns = ["Gemeinde", "Kanton", "Bezirk"]


numeric_gemeinde_df = gemeinde_df.drop(columns=non_numeric_columns)

gemeinde_df['Total Population'] = numeric_gemeinde_df.sum(axis=1)
for column in numeric_gemeinde_df.columns:
    gemeinde_df[column] = gemeinde_df[column] / gemeinde_df['Total Population']



gemeinde_df = gemeinde_df.drop(columns=["Total Population"])




df_municipalities = pd.read_csv("regions_municipalities.csv", sep=";")
# Match the GEO_NAME of df_municipalities to Gemeinde of df and add a column called Sprachregion to df based on DESC_VAL of df_municipalities

for i, row in gemeinde_df.iterrows():
        df_filtered = df_municipalities[df_municipalities["GEO_NAME"] == row["Gemeinde"]]
        if len(df_filtered) > 0:
            gemeinde_df.at[i, "Sprachregion"] = df_filtered["DESC_VAL"].values[0]
        else:
            gemeinde_df.at[i, "Sprachregion"] = "Unknown"
            print(row["Gemeinde"])

# Put the Gemeinde column at the beginning
gemeinde_df = gemeinde_df[["Gemeinde"] + [col for col in gemeinde_df.columns if col != "Gemeinde"]]

# Put the Sprachregion column as the second column
gemeinde_df = gemeinde_df[["Gemeinde", "Sprachregion"] + [col for col in gemeinde_df.columns if col not in ["Gemeinde", "Sprachregion"]]]

# Put the Kanton column as the third column
gemeinde_df = gemeinde_df[["Gemeinde", "Sprachregion", "Kanton"] + [col for col in gemeinde_df.columns if col not in ["Gemeinde", "Sprachregion", "Kanton"]]]

# Put the Bezirk column as the fourth column
gemeinde_df = gemeinde_df[["Gemeinde", "Sprachregion", "Kanton", "Bezirk"] + [col for col in gemeinde_df.columns if col not in ["Gemeinde", "Sprachregion", "Kanton", "Bezirk"]]]

# Add one empty column after the Bezirk column
gemeinde_df.insert(4, "", np.nan)


# Add one row with all columns being empty
new_row = 26 *  [np.nan]
new_row_df = pd.DataFrame([new_row], columns=gemeinde_df.columns)
gemeinde_df = pd.concat([new_row_df, gemeinde_df], ignore_index=True)

# Add one row with the first 5 columns being empty and then the columns being the following: Kind,Kind,Teenager,Teenager,Teenager,JungErwachsen,JungErwachsen,JungErwachsen,Erwachsen,Erwachsen,Erwachsen,Erwachsen,Erwachsen,Erwachsen,Senior,Senior,Senior,Senior,Senior,Senior,Senior
new_row = ["Detaillierte-Altersgruppen", np.nan, np.nan, np.nan, np.nan] + ['Kind'] * 2 + ['Teenager'] * 3 + ['JungErwachsen'] * 3 + ['Erwachsen'] * 6 + ['Senior'] * 7
new_row_df = pd.DataFrame([new_row], columns=gemeinde_df.columns)
gemeinde_df = pd.concat([new_row_df, gemeinde_df], ignore_index=True)

# Add one row with the first 5 columns being empty and then the columns being the following: Jung,Jung,Jung,Jung,Jung,Jung,Jung,Jung,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt,Alt
new_row = ["Jung/Alt", np.nan, np.nan, np.nan, np.nan] + ['Jung'] * 9 + ['Alt'] * 12
new_row_df = pd.DataFrame([new_row], columns=gemeinde_df.columns)
gemeinde_df = pd.concat([new_row_df, gemeinde_df], ignore_index=True)


empty_col_index = gemeinde_df.columns[gemeinde_df.isnull().all()].tolist()
if empty_col_index:
    first_empty_col = empty_col_index[0]
    split_index = gemeinde_df.columns.get_loc(first_empty_col)

    before_first_empty_col = gemeinde_df.iloc[:, :split_index]
    after_first_empty_col = gemeinde_df.iloc[:, split_index + 1 :]
else:
    raise Exception("No empty column found")

first_empty_row_index = gemeinde_df.isna().all(axis=1).idxmax()
before_first_empty_row = gemeinde_df.iloc[:first_empty_row_index]
after__first_empty_row = gemeinde_df.iloc[first_empty_row_index + 1 :]

number_columns_before_first_empty_col = before_first_empty_col.shape[1]
number_rows_before_first_empty_row = before_first_empty_row.shape[0]

raw_data_df = gemeinde_df.iloc[
    number_rows_before_first_empty_row:, number_columns_before_first_empty_col:
]



dic = {
    "datasetName": "Age-Groups",
    "descriptionText": "This dataset contains age group data",
    "itemNameSingular": "municipality",
    "itemNamePlural": "municipalities",
    "attributeNameSingular": "age group",
    "itemNamePlural": "age groups",
    "cellHoverTextSnippet1": "The age group of",
    "cellHoverTextSnippet2": {"single": "have voted", "plural": "have on average voted"},
    "cellHoverTextSnippet3": "'Yes' on",
    "defaultMinValue": raw_data_df.min().min(),
    "defaultMaxValue": raw_data_df.max().max(),
    "defaultColorBreakpoints": "",
    "csvFile": gemeinde_df.to_csv(index=False),
}

with open ("Age-Groups.json", "w") as f:
    f.write(json.dumps(dic, indent=4, ))


Index(['Jahr', 'Kanton (-) / Bezirk (>>) / Gemeinde (......)',
       'Bevölkerungstyp', 'Geschlecht', 'Zivilstand', '0-4 Jahre', '5-9 Jahre',
       '10-14 Jahre', '15-19 Jahre', '20-24 Jahre', '25-29 Jahre',
       '30-34 Jahre', '35-39 Jahre', '40-44 Jahre', '45-49 Jahre',
       '50-54 Jahre', '55-59 Jahre', '60-64 Jahre', '65-69 Jahre',
       '70-74 Jahre', '75-79 Jahre', '80-84 Jahre', '85-89 Jahre',
       '90-94 Jahre', '95-99 Jahre', '100 Jahre und mehr'],
      dtype='object')
Bois-d'Amont
Obersaxen Mundaun
