In [7]:
import matplotlib.pyplot as plt
import polars as pl
import seaborn as sns
import pathlib
import json


In [8]:
dataset_1 = pl.read_csv(pathlib.Path("IndividualInternetUsage.csv"))
dataset_2 = pl.read_csv(pathlib.Path("IndividualsInternetUsageByAgeGroupAnnual.csv"))

In [9]:
dataset_2

DataSeries,2024,2023,2022,2021,2020,2019,2018,2017
str,f64,f64,f64,f64,f64,f64,f64,f64
"""Overall (Singapore Residents A…",91.06,91.25,92.82,93.89,91.96,88.3,86.47,82.57
"""Singapore Residents Aged 18 - …",97.84,98.46,99.11,99.74,99.57,99.78,99.89,99.39
"""Singapore Residents Aged 40 - …",95.95,95.56,98.33,97.98,97.63,97.08,93.03,90.75
"""Singapore Residents Aged 60 An…",77.28,76.46,77.93,80.94,73.14,58.03,55.5,41.7


In [10]:
dataset_2 = dataset_2.filter(pl.col("DataSeries") == "Overall (Singapore Residents Aged 18 And Over)")

In [None]:
dataset_2 = dataset_2.unpivot(
    index="DataSeries",
    variable_name="year"
).drop("DataSeries")

In [12]:
dataset_2 = dataset_2.with_columns(pl.col("year").cast(pl.Int64))

In [15]:
dataset_1 = dataset_1.rename({"internet_usage": "value"})

In [20]:
dataset_1 = dataset_1.with_columns(pl.col("value").cast(pl.Float64))

In [23]:
combined_dataset = pl.concat([dataset_1, dataset_2], how="vertical").unique(subset=["year"], keep="last").sort("year")

In [30]:
chart_data = {
    "title": "Internet Penetration in Singapore (2000-2024)",
    "subtitle": "Percentage of population using the internet",
    "description": "Household and individual internet usage rates in Singapore from 2000 to 2024, showing the digital transformation from early adoption to near-universal coverage.",
    "unit": "percentage",
    "sources": [
        {
            "name": "Data.gov.sg - Individuals Internet Usage By Age Group, Annual",
            "url": "https://data.gov.sg/datasets/d_3f4bfee2d42f8fb3bea3218c01aa9902/view"
        },
        {
            "name": "Data.gov.sg - Individual Internet Usage",
            "url": "https://data.gov.sg/datasets/d_fcc02bc884c54a09e8665443bff2f4c2/view"
        }
    ],
    "data": combined_dataset.rename({"value": "percentage"}).to_dicts()}

with open(pathlib.Path("../internet_usage_in_sg.json"), "w") as file:
    json.dump(chart_data, file, indent=4)