In [25]:
import piccard as pc
import piccard2 as pc2

# install other dependencies, pip install first if needed
from tscluster.tsplot import tsplot
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [26]:
households_data_2021 = gpd.read_file("https://raw.githubusercontent.com/ecorbin567/piccard2/refs/heads/main/docs/piccard2_testing_data/households_data_2021.geojson")
households_data_2016 = gpd.read_file("https://raw.githubusercontent.com/ecorbin567/piccard2/refs/heads/main/docs/piccard2_testing_data/households_data_2016.geojson")
households_data_2011 = gpd.read_file("https://raw.githubusercontent.com/ecorbin567/piccard2/refs/heads/main/docs/piccard2_testing_data/households_data_2011.geojson")
households_data_2006 = gpd.read_file("https://raw.githubusercontent.com/ecorbin567/piccard2/refs/heads/main/docs/piccard2_testing_data/households_data_2006.geojson")

households_data_2021.rename(columns={'v_CA21_434: Occupied private dwellings by structural type of dwelling data': 'occupied_private_dwellings',
                                     'v_CA21_435: Single-detached house': 'single_detached_house',
                                     'v_CA21_440: Apartment in a building that has five or more storeys': 'apt_five_or_more'}, inplace=True)
households_data_2016.rename(columns={'v_CA16_408: Occupied private dwellings by structural type of dwelling data': 'occupied_private_dwellings',
                                     'v_CA16_409: Single-detached house': 'single_detached_house',
                                     'v_CA16_410: Apartment in a building that has five or more storeys': 'apt_five_or_more'}, inplace=True)
households_data_2011.rename(columns={'v_CA11F_199: Total number of occupied private dwellings by structural type of dwelling': 'occupied_private_dwellings',
                                     'v_CA11F_200: Single-detached house': 'single_detached_house',
                                     'v_CA11F_201: Apartment, building that has five or more storeys': 'apt_five_or_more',}, inplace=True)
households_data_2006.rename(columns={'v_CA06_119: Total number of occupied private dwellings by structural type of dwelling - data': 'occupied_private_dwellings',
                                     'v_CA06_120: Single-detached house': 'single_detached_house',
                                     'v_CA06_124: Apartment, building that has five or more storeys': 'apt_five_or_more',}, inplace=True)

In [27]:
census_dfs = [households_data_2006, households_data_2011, households_data_2016, households_data_2021]
years = ['2006', '2011', '2016', '2021']

network_table = pc.create_network_table(census_dfs, years, 'GeoUID')
G = pc.create_network(census_dfs, years, 'GeoUID', 0.05)

In [28]:
arr, label_dict = pc2.clustering_prep(network_table, 'name', [
    'occupied_private_dwellings_2006', 'single_detached_house_2006', 'apt_five_or_more_2006',
    'occupied_private_dwellings_2011', 'single_detached_house_2011', 'apt_five_or_more_2011',
    'occupied_private_dwellings_2016', 'single_detached_house_2016', 'apt_five_or_more_2016',
    'occupied_private_dwellings_2021', 'single_detached_house_2021', 'apt_five_or_more_2021'])

In [29]:
tsc = pc2.cluster(network_table, G, 'GeoUID', 4, arr=arr, label_dict=label_dict)

Initialization with kmeans++, Sum of Distance: 2378037516.6667, Max Distance: 6859.2583
Iteration 0, Sum of distance: 1783766278.9400, Max distance: 7594.8886, Number of change: 149
Iteration 5, Sum of distance: 1562560998.9736, Max distance: 7594.8886, Number of change: 46
Iteration 10, Sum of distance: 1553885796.4576, Max distance: 7594.8886, Number of change: 0
Converged at iteration 15, Sum of distance: 1553885796.4576, Max distance: 7594.8886


In [None]:
# %pip install nbformat needed if working in ipynb
# next steps:
# change testing notebook
# pass in colours to all plots so they are the same & work with colourblindness
# make other plots more customizable-- titles, x axis rotation, cluster labels, figsize
figs = pc2.plot_clusters(
    tsc=tsc,
    network_table=network_table,
    arr=arr,
    label_dict=label_dict,
    clusters_to_show=[3],
    clusters_to_exclude=[],
    hover_labels=True,
)
for fig in figs:
    fig.show()