In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

from sklearn.linear_model import LinearRegression

import warnings
warnings.filterwarnings('ignore')

# load data

In [2]:
df_birth = pd.read_csv('./data/2_BirthsAndFertilityRatesAnnual.csv')
df_childcare = pd.read_csv('./data/3_ListingofCentres.csv')
df_bto = pd.read_csv('./data/4_btomapping.csv')
df_resi = pd.read_excel('./data/1_respopagesex2000to2020e.xlsx', sheet_name=None, header=2)

ls_df = []
for sheet_name, df in df_resi.items():
    print(f"Sheet Name: {sheet_name}")
    ls_df.append(df)

df_resi = ls_df[0]
for df in ls_df[1:]:
    df_resi = df_resi.merge(df, on=['Planning Area', 'Subzone', 'Age', 'Sex'], how='left')

df_resi.to_csv('./data_processed/1_population.csv', index=False)

Sheet Name: 2000
Sheet Name: 2001-2010
Sheet Name: 2011-2019
Sheet Name: 2020


# set up parameters

In [3]:
watching_period = 5 # years
current_year = 2024
start_year = current_year - 6 # 6 years
ratio = 18/12  # 18 mo

# assumption
capacity_per_childcare = 100
bto_ratio = 0.2


# birth rate forecasting

In [4]:
def extract_birth_rates(df_birth):
    target_age_groups = [
        '    15 - 19 Years',
       '    20 - 24 Years', '    25 - 29 Years', '    30 - 34 Years',
       '    35 - 39 Years', '    40 - 44 Years', '    45 - 49 Years',
    ]
    
    age_data = df_birth[df_birth['DataSeries'].isin(target_age_groups)]
    
    year_cols = [col for col in df_birth.columns if str(col).isdigit()]
    year_cols.sort()
    
    result_data = []
    for _, row in age_data.iterrows():
        age_group = row['DataSeries']
        for year_col in year_cols:
            birth_rate = row[year_col]
            # missing values
            if pd.isna(birth_rate) or birth_rate in ['na', 'NaN', '']:
                birth_rate = np.nan
            else:
                birth_rate = float(birth_rate)
                
            result_data.append({
                'age_group': age_group,
                'year': int(year_col),
                'birth_rate': birth_rate / 1000
            })
    
    final_df = pd.DataFrame(result_data) 
    return final_df

df_birth_tmp = extract_birth_rates(df_birth)


In [5]:
df_birth_tmp.head()

Unnamed: 0,age_group,year,birth_rate
0,15 - 19 Years,1960,0.0696
1,15 - 19 Years,1961,0.0634
2,15 - 19 Years,1962,0.052
3,15 - 19 Years,1963,0.0457
4,15 - 19 Years,1964,0.0383


In [6]:
def forecast_birth_rate_by_age_group(df_birth_tmp):
    
    df_birth_tmp = df_birth_tmp.copy()
    df_birth_tmp['year'] = pd.to_numeric(df_birth_tmp['year'], errors='coerce')
    df_birth_tmp['birth_rate'] = pd.to_numeric(df_birth_tmp['birth_rate'], errors='coerce')
    
    df_clean = df_birth_tmp.dropna(subset=['year', 'birth_rate'])
    
    # model
    def predict_birth_rate_trend(years, birth_rates, forecast_years):
        if len(years) < 2:  
            last_rate = birth_rates.iloc[-1] if len(birth_rates) > 0 else 0
            return [last_rate] * len(forecast_years)
        
        try:
            X = np.array(years).reshape(-1, 1)
            y = np.array(birth_rates)
            
            model = LinearRegression()
            model.fit(X, y)
            
            X_forecast = np.array(forecast_years).reshape(-1, 1)
            predictions = model.predict(X_forecast)
            
            predictions = np.maximum(predictions, 0)
            
            return predictions.tolist()
            
        except Exception as e:

            if len(birth_rates) >= 2:
                rate_change = (birth_rates.iloc[-1] - birth_rates.iloc[0]) / len(birth_rates)
                last_rate = birth_rates.iloc[-1]
                predictions = []
                for i, year in enumerate(forecast_years):
                    pred = last_rate + rate_change * (i + 1)
                    predictions.append(max(0, pred))  
                return predictions
            else:
                return [birth_rates.iloc[-1] if len(birth_rates) > 0 else 0] * len(forecast_years)
    
    forecast_years = list(range(current_year+1, current_year+watching_period+1)) # [2025, 2026, 2027, 2028, 2029]
    forecast_results = []
    
    total_age_groups = df_clean['age_group'].nunique()
    processed = 0
    
    for age_group, group_data in df_clean.groupby('age_group'):
        group_data = group_data.sort_values('year')
        
        years = group_data['year'].tolist()
        birth_rates = group_data['birth_rate']
        
        print(f"Age Group: {age_group} (records: {len(years)})")
        
        # forecast
        predictions = predict_birth_rate_trend(years, birth_rates, forecast_years)
        
        # save
        for year, pred_rate in zip(forecast_years, predictions):
            forecast_results.append({
                'age_group': age_group,
                'year': year,
                'predicted_birth_rate': round(pred_rate, 4)
            })
        
        processed += 1
        print(f"Done: {processed}/{total_age_groups}")
    
    forecast_df = pd.DataFrame(forecast_results)
    return forecast_df

df_birth_forecast = forecast_birth_rate_by_age_group(df_birth_tmp)

Age Group:     15 - 19 Years (records: 65)
Done: 1/7
Age Group:     20 - 24 Years (records: 65)
Done: 2/7
Age Group:     25 - 29 Years (records: 65)
Done: 3/7
Age Group:     30 - 34 Years (records: 65)
Done: 4/7
Age Group:     35 - 39 Years (records: 65)
Done: 5/7
Age Group:     40 - 44 Years (records: 65)
Done: 6/7
Age Group:     45 - 49 Years (records: 45)
Done: 7/7


In [7]:
df_birth_forecast

Unnamed: 0,age_group,year,predicted_birth_rate
0,15 - 19 Years,2025,0.0
1,15 - 19 Years,2026,0.0
2,15 - 19 Years,2027,0.0
3,15 - 19 Years,2028,0.0
4,15 - 19 Years,2029,0.0
5,20 - 24 Years,2025,0.0
6,20 - 24 Years,2026,0.0
7,20 - 24 Years,2027,0.0
8,20 - 24 Years,2028,0.0
9,20 - 24 Years,2029,0.0


# Population forcasting for different female age group

In [8]:
# female number Assume female can give birth from 15 till 49
df_resi_tmp = df_resi[(df_resi.Age != 'Total') & (df_resi.Age != '90 & Over')  & (df_resi.Sex == 'Females') & (df_resi.Subzone != 'Total')]
df_resi_tmp['Age'] = df_resi_tmp['Age'].astype(int)
df_resi_tmp =  df_resi_tmp[(df_resi_tmp['Age'] >= 15) & (df_resi_tmp['Age'] <= 49)]

df_resi_tmp = df_resi_tmp[df_resi_tmp.Subzone.notna()]

# replace '-' with rolling average
year_columns = [col for col in df_resi_tmp.columns if str(col).isdigit()]
df_resi_tmp[year_columns] = df_resi_tmp[year_columns].replace('-', np.nan).astype(float)
df_resi_tmp[year_columns] = df_resi_tmp[year_columns].interpolate(method='linear', axis=1, limit_direction='both')

In [9]:
bins = [15, 20, 25, 30, 35, 40, 45, 50]
labels = [ '    15 - 19 Years',
       '    20 - 24 Years', '    25 - 29 Years', '    30 - 34 Years',
       '    35 - 39 Years', '    40 - 44 Years', '    45 - 49 Years']
df_resi_tmp['age_group'] = pd.cut(df_resi_tmp['Age'], bins=bins, labels=labels, include_lowest=True)


df_resi_tmp = df_resi_tmp.groupby(['Subzone', 'age_group'])[year_columns].sum().reset_index().melt(
    id_vars=['Subzone', 'age_group'],
    var_name='year', value_name='pop')

In [10]:
df_resi_tmp.head()

Unnamed: 0,Subzone,age_group,year,pop
0,Admiralty,15 - 19 Years,2000,110.0
1,Admiralty,20 - 24 Years,2000,110.0
2,Admiralty,25 - 29 Years,2000,220.0
3,Admiralty,30 - 34 Years,2000,120.0
4,Admiralty,35 - 39 Years,2000,140.0


In [11]:
def forecast_population_by_subzone_age(df_resi_tmp):
    df_resi_tmp = df_resi_tmp.copy()
    df_resi_tmp['year'] = pd.to_numeric(df_resi_tmp['year'], errors='coerce')
    df_resi_tmp['pop'] = pd.to_numeric(df_resi_tmp['pop'], errors='coerce')
    
    df_clean = df_resi_tmp.dropna(subset=['year', 'pop'])

    def predict_trend(years, populations, forecast_years):
        if len(years) < 2: 
            last_pop = populations.iloc[-1] if len(populations) > 0 else 0
            return [last_pop] * len(forecast_years)
        
        try:
            X = np.array(years).reshape(-1, 1)
            y = np.array(populations)

            model = LinearRegression()
            model.fit(X, y)
            
            X_forecast = np.array(forecast_years).reshape(-1, 1)
            predictions = model.predict(X_forecast)
            
            predictions = np.maximum(predictions, 0)
            
            return predictions.tolist()
            
        except Exception as e:
            if len(populations) >= 2:
                growth_rate = (populations.iloc[-1] - populations.iloc[0]) / len(populations)
                last_pop = populations.iloc[-1]
                predictions = []
                for i, year in enumerate(forecast_years):
                    pred = last_pop + growth_rate * (i + 1)
                    predictions.append(max(0, pred))
                return predictions
            else:
                return [populations.iloc[-1] if len(populations) > 0 else 0] * len(forecast_years)
    
    forecast_years = list(range(2021, current_year+watching_period+1)) # [2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029]  # 2021-2024 data is missing 
    forecast_results = []
    
    total_combinations = len(df_clean[['Subzone', 'age_group']].drop_duplicates())
    processed = 0
    
    for (subzone, age_group), group_data in df_clean.groupby(['Subzone', 'age_group']):
        group_data = group_data.sort_values('year')
        
        years = group_data['year'].tolist()
        populations = group_data['pop']

        predictions = predict_trend(years, populations, forecast_years)
        
        # save
        for year, pred_pop in zip(forecast_years, predictions):
            forecast_results.append({
                'Subzone': subzone,
                'age_group': age_group,
                'year': year,
                'predicted_pop': round(pred_pop)
            })
        
        processed += 1
        if processed % 50 == 0:
            print(f"Done: {processed}/{total_combinations}")
    
    forecast_df = pd.DataFrame(forecast_results)  
    return forecast_df

df_pop_forecast =  forecast_population_by_subzone_age(df_resi_tmp)

Done: 50/2086
Done: 100/2086
Done: 150/2086
Done: 200/2086
Done: 250/2086
Done: 300/2086
Done: 350/2086
Done: 400/2086
Done: 450/2086
Done: 500/2086
Done: 550/2086
Done: 600/2086
Done: 650/2086
Done: 700/2086
Done: 750/2086
Done: 800/2086
Done: 850/2086
Done: 900/2086
Done: 950/2086
Done: 1000/2086
Done: 1050/2086
Done: 1100/2086
Done: 1150/2086
Done: 1200/2086
Done: 1250/2086
Done: 1300/2086
Done: 1350/2086
Done: 1400/2086
Done: 1450/2086
Done: 1500/2086
Done: 1550/2086
Done: 1600/2086
Done: 1650/2086
Done: 1700/2086
Done: 1750/2086
Done: 1800/2086
Done: 1850/2086
Done: 1900/2086
Done: 1950/2086
Done: 2000/2086
Done: 2050/2086


In [12]:
df_pop_forecast

Unnamed: 0,Subzone,age_group,year,predicted_pop
0,Admiralty,15 - 19 Years,2021,546
1,Admiralty,15 - 19 Years,2022,563
2,Admiralty,15 - 19 Years,2023,580
3,Admiralty,15 - 19 Years,2024,597
4,Admiralty,15 - 19 Years,2025,613
...,...,...,...,...
18769,Yunnan,45 - 49 Years,2025,2608
18770,Yunnan,45 - 49 Years,2026,2634
18771,Yunnan,45 - 49 Years,2027,2660
18772,Yunnan,45 - 49 Years,2028,2686


# calculate young child growing up from previous years

In [13]:
df_birth_history = df_birth_tmp[(df_birth_tmp.year >= start_year) & (df_birth_tmp.year <= current_year)]

# add partial forcast
df_pop_forecast_part = df_pop_forecast[(df_pop_forecast.year >= 2021) & (df_pop_forecast.year <= current_year)]
df_resi_history = df_resi_tmp[(df_resi_tmp.year >= start_year) & (df_resi_tmp.year <= 2020)]
df_resi_history = pd.concat([df_pop_forecast_part.rename(columns={'predicted_pop': 'pop', 'predicted_birth_rate': 'birth_rate'}), df_resi_history])

In [14]:
df_pop_birth_history = df_resi_history.merge(df_birth_history, on=['age_group', 'year'], how='left')

In [15]:
df_pop_birth_history.head(20) #.birth_rate.isna().sum()

Unnamed: 0,Subzone,age_group,year,pop,birth_rate
0,Admiralty,15 - 19 Years,2021,546.0,0.0022
1,Admiralty,15 - 19 Years,2022,563.0,0.0021
2,Admiralty,15 - 19 Years,2023,580.0,0.0022
3,Admiralty,15 - 19 Years,2024,597.0,0.0023
4,Admiralty,20 - 24 Years,2021,415.0,0.0117
5,Admiralty,20 - 24 Years,2022,424.0,0.0112
6,Admiralty,20 - 24 Years,2023,433.0,0.0106
7,Admiralty,20 - 24 Years,2024,443.0,0.0098
8,Admiralty,25 - 29 Years,2021,538.0,0.0534
9,Admiralty,25 - 29 Years,2022,540.0,0.0488


In [16]:
df_pop_birth_history['new_born'] = df_pop_birth_history['pop'] * df_pop_birth_history['birth_rate']
df_pop_birth_history['new_born'] = df_pop_birth_history['new_born'].astype(int)

In [17]:
df_pop_forecast

Unnamed: 0,Subzone,age_group,year,predicted_pop
0,Admiralty,15 - 19 Years,2021,546
1,Admiralty,15 - 19 Years,2022,563
2,Admiralty,15 - 19 Years,2023,580
3,Admiralty,15 - 19 Years,2024,597
4,Admiralty,15 - 19 Years,2025,613
...,...,...,...,...
18769,Yunnan,45 - 49 Years,2025,2608
18770,Yunnan,45 - 49 Years,2026,2634
18771,Yunnan,45 - 49 Years,2027,2660
18772,Yunnan,45 - 49 Years,2028,2686


# calculate new borns in next few years

In [18]:
df_pop_birth = df_pop_forecast[df_pop_forecast.year > current_year].merge(df_birth_forecast, on=['age_group', 'year'], how='left')

In [19]:
df_pop_birth['new_born'] = df_pop_birth['predicted_pop'] * df_pop_birth['predicted_birth_rate']
df_pop_birth['new_born'] = df_pop_birth['new_born'].astype(int)

# get final new borns for next 5 years

In [20]:
# add babies from earlier years
df_pop_birth_history['year'] = df_pop_birth_history['year'].astype(int)
df_merge = pd.concat([df_pop_birth_history, df_pop_birth.rename(columns={'predicted_pop': 'pop', 'predicted_birth_rate': 'birth_rate'})])

In [21]:
df_final_pop = pd.DataFrame(columns=['Subzone'])
for year in range(current_year+1, current_year+watching_period+1):
    tmp_start_year = int(year - ratio)

    tmp_pop = df_merge[df_merge.year == tmp_start_year].groupby(['age_group', 'Subzone']).new_born.sum() * (2-ratio) + \
        df_merge[(df_merge.year >= year-6) & (df_merge.year < tmp_start_year)].groupby(['age_group', 'Subzone']).new_born.sum()  # 18m to 6 years
    
    tmp_pop = tmp_pop.reset_index().groupby('Subzone').new_born.sum().reset_index().rename(columns={'new_born': year})
    df_final_pop = df_final_pop.merge(tmp_pop, on='Subzone', how='outer')
    

In [22]:
df_final_pop

Unnamed: 0,Subzone,2025,2026,2027,2028,2029
0,Admiralty,618.0,638.0,638.0,561.0,488.0
1,Airport,0.0,0.0,0.0,0.0,0.0
2,Alexandra Hill,393.0,379.5,354.0,295.5,244.0
3,Alexandra North,94.0,85.5,73.0,64.0,55.0
4,Aljunied,1427.0,1370.5,1259.0,1069.0,898.5
...,...,...,...,...,...,...
293,Yishun East,2755.0,2690.0,2543.5,2260.0,1992.5
294,Yishun South,1495.5,1387.5,1221.5,1031.0,856.0
295,Yishun West,1865.0,1814.0,1695.5,1432.5,1193.0
296,Yuhua,1153.5,1107.5,1021.0,873.0,736.0


# BTO impact

In [23]:
df_final_pop_tmp = df_final_pop.melt(id_vars='Subzone', var_name='year', value_name='child_pop')

In [24]:
df_add_bto = df_final_pop_tmp.merge(df_bto[['Subzone', 'Estimated completion year', 'Total number of units']], 
left_on=['Subzone', 'year'], right_on=['Subzone', 'Estimated completion year'], how='outer')  

df_add_bto

Unnamed: 0,Subzone,year,child_pop,Estimated completion year,Total number of units
0,Admiralty,2025,618.0,,
1,Airport,2025,0.0,,
2,Alexandra Hill,2025,393.0,,
3,Alexandra North,2025,94.0,,
4,Aljunied,2025,1427.0,2025.0,421.0
...,...,...,...,...,...
1647,Balestier,,,2028.0,739.0
1648,Plantation,,,2026.0,1010.0
1649,Jelebu,,,2027.0,334.0
1650,Matilda,,,2027.0,962.0


In [25]:
# add bto
df_add_bto = df_add_bto.fillna(0)
df_add_bto['child_pop'] = df_add_bto['child_pop'] + df_add_bto['Total number of units'] * bto_ratio
df_add_bto['child_pop'] = df_add_bto['child_pop'].astype(int)

In [26]:
ls_idx = df_add_bto[df_add_bto['year'] == 0].index
df_add_bto.loc[ls_idx, 'year'] = df_add_bto.loc[ls_idx]['Estimated completion year'].astype(int)

In [27]:
df_add_bto.groupby(['Subzone'	,'year']).child_pop.sum()

Subzone      year
 Balestier   2027     208
             2028     147
 Plantation  2026     202
Admiralty    2025     618
             2026     638
                     ... 
Yunnan       2025    2311
             2026    2162
             2027    1968
             2028    1637
             2029    1329
Name: child_pop, Length: 1593, dtype: int64

# childcare center capacity

In [28]:
# 2025.1 - 7

ls_col_drop = [
    'infant_vacancy_current_month',
       'infant_vacancy_next_month', 'infant_vacancy_third_month',
       'infant_vacancy_fourth_month', 'infant_vacancy_fifth_month',
       'infant_vacancy_sixth_month', 'infant_vacancy_seventh_month',
       'tp_code', 'food_offered', 'second_languages_offered', 
       'remarks',
       'centre_contact_no',
       'centre_email_address', 
       'centre_website',

]
df_childcare.drop(columns=ls_col_drop, inplace=True)
df_childcare = df_childcare[df_childcare.centre_code != 'na']

In [29]:
df_childcare['postal_code'] = df_childcare['postal_code'].astype(int)
# get subzone mapping
df_po_subzone = pd.read_csv('./data_processed/postcode_ll_subzone.csv')

In [30]:
df_childcare_subzone = df_childcare.merge(df_po_subzone[['postcode', 'Subzone']], left_on='postal_code', right_on='postcode', how='left')
df_childcare_subzone['Subzone'] = df_childcare_subzone.Subzone.str.title()

In [31]:
# assume
df_childcare_capacity = df_childcare_subzone.Subzone.value_counts().reset_index()
df_childcare_capacity.columns = ['Subzone', 'Capacity']
df_childcare_capacity['Capacity'] = df_childcare_capacity['Capacity']  * capacity_per_childcare

In [32]:
df_childcare_capacity

Unnamed: 0,Subzone,Capacity
0,Tampines East,5400
1,Woodlands East,4600
2,Tampines West,3500
3,Bedok North,3300
4,Frankel,2900
...,...,...
233,Singapore Polytechnic,100
234,Cairnhill,100
235,Greenwood Park,100
236,Clifford Pier,100


In [33]:
start_year = current_year + 1
end_year = current_year + watching_period

grouped = df_add_bto.groupby('Subzone')

results = {}
for y in range(start_year, end_year + 1):
    # calculate cumsum 2025, 2025–2026、2025–2027 ... 到 2025–2029
    mask = (df_add_bto['year'] >= start_year) & (df_add_bto['year'] <= y)
    temp = df_add_bto[mask].groupby('Subzone')['child_pop'].sum().rename(f'{start_year}_{y}')
    results[f'{start_year}_{y}'] = temp

out = pd.DataFrame(results).fillna(0).reset_index()
out

Unnamed: 0,Subzone,2025_2025,2025_2026,2025_2027,2025_2028,2025_2029
0,Balestier,0.0,0.0,208.0,355,355
1,Plantation,0.0,202.0,202.0,202,202
2,Admiralty,618.0,1256.0,1894.0,2455,2943
3,Airport,0.0,0.0,0.0,0,0
4,Alexandra Hill,393.0,772.0,1126.0,1421,1665
...,...,...,...,...,...,...
305,Yishun South,1495.0,2882.0,4103.0,5134,5990
306,Yishun West,1865.0,3679.0,5374.0,6806,7999
307,Yuhua,1153.0,2260.0,3281.0,4154,4890
308,Yuhua East,0.0,0.0,0.0,112,112


# Save output

In [34]:
df_final = out.merge(df_childcare_capacity, on='Subzone', how='left').fillna(0)

for col in ['2025_2025', '2025_2026', '2025_2027', '2025_2028', '2025_2029']:
    df_final[col+'_diff'] = df_final['Capacity'] - df_final[col] 
    
df_final.to_csv('./data_processed/final_output.csv', index=False)

# Summary

In [35]:
df_final_tmp = df_final.copy()
df_final_tmp.set_index('Subzone', inplace=True)

In [36]:
# % fulfilment among subzones
(df_final_tmp > 0).sum()/df_final_tmp.shape[0]

2025_2025         0.719355
2025_2026         0.729032
2025_2027         0.738710
2025_2028         0.745161
2025_2029         0.745161
Capacity          0.635484
2025_2025_diff    0.464516
2025_2026_diff    0.241935
2025_2027_diff    0.174194
2025_2028_diff    0.145161
2025_2029_diff    0.129032
dtype: float64

In [37]:
# num of Subzones with x years of fulfillment 
(df_final_tmp > 0).iloc[:, -5:].sum(axis=1).sort_values().value_counts()

0    166
1     69
5     40
2     21
3      9
4      5
dtype: int64

In [None]:
# top subzones with most gap
df_tmp = df_final_tmp[['2025_2025_diff', '2025_2026_diff',	'2025_2027_diff',	'2025_2028_diff',	'2025_2029_diff']].sum(axis=1).sort_values()
df_tmp.head(10)

Subzone
Central                 -67046.0
Tampines West           -51916.0
Sungei Serangoon West   -35873.0
Tampines East           -33441.0
Hong Kah                -29746.0
Buangkok                -29065.0
Woodlands East          -27360.0
Yishun East             -26526.0
Sengkang Town Centre    -23668.0
Boon Lay                -22798.0
dtype: float64

In [39]:
# top subzones with least gap
df_tmp.tail(10)

Subzone
Tai Seng                       2409.0
Katong                         2785.0
International Business Park    3000.0
Sennett                        3084.0
Xilin                          3558.0
Jelebu                         3802.0
Pasir Ris Central              3930.0
Kallang Bahru                  4000.0
Yuhua East                     4776.0
Matilda                        6424.0
dtype: float64

In [47]:
df_final_tmp[['2025_2025_diff', '2025_2026_diff',	'2025_2027_diff',	'2025_2028_diff',	'2025_2029_diff']] 

Unnamed: 0_level_0,2025_2025_diff,2025_2026_diff,2025_2027_diff,2025_2028_diff,2025_2029_diff
Subzone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Balestier,0.0,0.0,-208.0,-355.0,-355.0
Plantation,0.0,-202.0,-202.0,-202.0,-202.0
Admiralty,382.0,-256.0,-894.0,-1455.0,-1943.0
Airport,0.0,0.0,0.0,0.0,0.0
Alexandra Hill,607.0,228.0,-126.0,-421.0,-665.0
...,...,...,...,...,...
Yishun South,105.0,-1282.0,-2503.0,-3534.0,-4390.0
Yishun West,-265.0,-2079.0,-3774.0,-5206.0,-6399.0
Yuhua,-1153.0,-2260.0,-3281.0,-4154.0,-4890.0
Yuhua East,1000.0,1000.0,1000.0,888.0,888.0


In [None]:
# ]].sort_values(by=['2025_2025_diff']) #,