In [36]:
import warnings

import pandas as pd
import numpy as np

from jre_utils.datapath import (
    model_ready_data_paths,
    model_output_data_paths,
)

from jre_utils.visualize import plot_time_series
from jre_utils.process import get_cumulative_growth, get_cumulative_growth_from_base


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [37]:
metrics = {
    "weighted_mean": "unit_price_wmean",
    "weighted_median": "unit_price_wmedian",
    "mean": "unit_price_mean",
    "median": "unit_price_median",
    "weighted_mean_smoothed": "unit_price_wmean_smoothed",
    "weighted_median_smoothed": "unit_price_wmedian_smoothed",
    "mean_smoothed": "unit_price_mean_smoothed",
    "median_smoothed": "unit_price_median_smoothed",
}

granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric_key = "weighted_median_smoothed"
metric = metrics[metric_key]
metric_pct_chg = metric + "_pct_chg"
upcoming_metric = "upcoming_" + metric
normalized_metric_pct_chg = "normalized_" + metric + "_pct_chg"

In [61]:
start_year = 2006
eval_start_year = 2020 # eval_years = [2020, 2021, 2022]
eval_end_year = 2022

dataset_key = "transactions"
years_ahead = 2
dataset_name = f"sequence_{dataset_key}_{metric_key}_{years_ahead}"
output_dataset_name = f"{dataset_name}_{eval_start_year}"
model_ready_data_path = model_ready_data_paths[dataset_name]
model_output_data_path = model_output_data_paths[output_dataset_name]

output_df = pd.read_csv(model_output_data_path)
core_df = pd.read_csv(model_ready_data_path)

In [39]:
# What do I want to see with tooling? What is the argument I want to make?

# The model outputs normalized returns for each area for each eval year
# I want to take out one target year, for e.g. 2022
# I want to sort the relative returns for that year
# I want to compare several areas
# I want to see the cumulative factors for each area, this will help me identify trends

# once this is done, write a narrative for the teammates

In [62]:
core_df[core_df["area_code"] == 22205]

Unnamed: 0,unit_price_wmedian_smoothed_pct_chg,unit_price_wmedian_smoothed,year,years_since_crisis,count,total_traded_area,population,taxpayer_count,taxable_income,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth,total_tax,total_tax_growth,new_dwellings,existing_dwellings,net_migration_ratio,new_dwellings_ratio,migrations_is_available,taxable_income_is_available,dwellings_is_available,total_tax_is_available,area_code,area
18055,-0.380711,48804.375141,2009,1,66.0,24475.0,32971.0,18858.0,57521629.0,3050.250769,-0.084796,-0.075283,10489161.0,-0.047428,117.0,36941.0,0.000607,0.003167,1,1,1,1,22205,Shizuoka-ken Atami-shi
18056,-0.437615,35947.712418,2010,2,81.0,34870.0,32991.0,18391.0,52691475.0,2865.068512,-0.083971,-0.060711,10502168.0,0.00124,328.0,37058.0,0.002425,0.008851,1,1,1,1,22205,Shizuoka-ken Atami-shi
18057,-0.355986,31430.709603,2011,3,51.0,17320.0,33071.0,17776.0,50732226.0,2853.97311,-0.037183,-0.003873,10226135.0,-0.026283,341.0,37386.0,0.004929,0.009121,1,1,1,1,22205,Shizuoka-ken Atami-shi
18058,-0.179854,29482.359962,2012,4,80.0,28630.0,33234.0,17216.0,48997548.0,2846.047165,-0.034193,-0.002777,9791024.0,-0.042549,101.0,37727.0,0.004453,0.002677,1,1,1,1,22205,Shizuoka-ken Atami-shi
18059,-0.040344,30162.666029,2013,5,77.0,34195.0,33382.0,17136.0,48967696.0,2857.59197,-0.000609,0.004056,9793374.0,0.00024,110.0,37828.0,0.003475,0.002908,1,1,1,1,22205,Shizuoka-ken Atami-shi
18060,0.150013,33905.109157,2014,6,67.0,27145.0,33498.0,17165.0,50435446.0,2938.272415,0.029974,0.028234,9967010.0,0.01773,384.0,37938.0,-0.000478,0.010122,1,1,1,1,22205,Shizuoka-ken Atami-shi
18061,0.293594,39018.25236,2015,7,68.0,27400.0,33482.0,16940.0,50358761.0,2972.772196,-0.00152,0.011742,9762576.0,-0.020511,142.0,38322.0,0.00451,0.003705,1,1,1,1,22205,Shizuoka-ken Atami-shi
18062,0.270834,43087.769156,2016,8,74.0,28440.0,33633.0,17044.0,51725102.0,3034.798287,0.027132,0.020865,9757556.0,-0.000514,63.0,38464.0,0.004936,0.001638,1,1,1,1,22205,Shizuoka-ken Atami-shi
18063,0.061243,41407.846745,2017,9,81.0,31130.0,33799.0,17137.0,51994130.0,3034.027543,0.005201,-0.000254,9754325.0,-0.000331,103.0,38527.0,0.010562,0.002673,1,1,1,1,22205,Shizuoka-ken Atami-shi
18064,-0.168853,35812.25781,2018,10,66.0,22720.0,34156.0,17367.0,56631471.0,3260.866644,0.08919,0.074765,9898571.0,0.014788,59.0,38630.0,0.000937,0.001527,1,1,1,1,22205,Shizuoka-ken Atami-shi


In [58]:
target_year = 2022
prefecture_code = 22 # (Tokyo, 13), (Hokaido, 1)
results_df = output_df[output_df["year"] == target_year].sort_values(by=["predicted_normalized_return"], ascending=False)
prefectural_results_df = results_df[(results_df["area_code"].astype(int) // 1000 == prefecture_code)]
# results_df
print(len(prefectural_results_df))
prefectural_results_df.head(10)

34


Unnamed: 0,year,area_code,predicted_normalized_return,unit_price_wmedian_smoothed_pct_chg,normalized_unit_price_wmedian_smoothed_pct_chg
128,2022,22301,1.043257,0.27562,0.643081
182,2022,22304,0.835727,-0.024923,-0.245449
222,2022,22222,0.702759,0.463096,1.197337
247,2022,22207,0.614778,-0.04858,-0.315389
381,2022,22205,0.330175,0.508921,1.332816
387,2022,22223,0.326005,-0.09203,-0.443847
396,2022,22208,0.312025,0.101057,0.127
401,2022,22214,0.306811,0.359625,0.891435
479,2022,22210,0.180489,0.146675,0.261864
569,2022,22206,0.084148,0.137625,0.23511


In [41]:
prefectural_results_df.tail(10)

Unnamed: 0,year,area_code,predicted_normalized_return,unit_price_wmedian_smoothed_pct_chg,normalized_unit_price_wmedian_smoothed_pct_chg
857,2022,26205,-0.245691,-0.208192,-0.78727
892,2022,26214,-0.294226,-0.098804,-0.463873
939,2022,26213,-0.361289,-0.141129,-0.589003
1065,2022,26202,-0.562107,-0.09214,-0.444173
1121,2022,26211,-0.654612,-0.318021,-1.111968
1142,2022,26407,-0.689627,-0.259672,-0.939466
1154,2022,26465,-0.704094,-0.2717,-0.975025
1252,2022,26303,-0.911691,-0.335992,-1.165099
1267,2022,26203,-0.942309,-0.252415,-0.918012
1325,2022,26463,-1.078612,-0.412271,-1.390612


In [55]:
area_codes = [22205]
area_df = core_df[core_df["area_code"].isin(area_codes)].sort_values(by="year", ascending=True)

area_df[f"cumulative_{metric}_growth"] = get_cumulative_growth_from_base(area_df.copy(), metric)

plot_time_series(
    area_df,
    f"cumulative_{metric}_growth",
    group_by_columns,
    granularity_columns,
    f"cumulative_{metric}_growth over time",
    # visible="legendonly",
    width=1200,
    height=600,
    highlight=True,
    highlight_range=(2020, 2022)
)

growth_factors = [
    "taxable_income_growth",
    "total_tax_growth",
    "net_migration_ratio",
    "new_dwellings_ratio",
]

base_factors = [
    "taxpayer_count"
]

cumulative_factors = [
    f"cumulative_{factor}" for factor in growth_factors
] + [
    f"cumulative_{factor}_growth" for factor in base_factors
]

area_df[growth_factors] = area_df[growth_factors].replace({0: np.nan})
area_df[base_factors] = area_df[base_factors].replace({0: np.nan})

for factor in growth_factors:
    area_df[f"cumulative_{factor}"] = get_cumulative_growth(
        area_df.copy(), factor
    )

for factor in base_factors:
    area_df[f"cumulative_{factor}_growth"] = get_cumulative_growth_from_base(
        area_df.copy(), factor
    )

for cumulative_factor in cumulative_factors:
    plot_time_series(
        area_df,
        cumulative_factor,
        group_by_columns,
        granularity_columns,
        f"{cumulative_factor} over time",
        # visible="legendonly",
        width=1200,
        height=600,
        highlight=True,
        highlight_range=(2015, 2020)
    )



In [59]:
area_df

Unnamed: 0,unit_price_wmedian_smoothed_pct_chg,unit_price_wmedian_smoothed,year,years_since_crisis,count,total_traded_area,population,taxpayer_count,taxable_income,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth,total_tax,total_tax_growth,new_dwellings,existing_dwellings,net_migration_ratio,new_dwellings_ratio,migrations_is_available,taxable_income_is_available,dwellings_is_available,total_tax_is_available,area_code,area,cumulative_unit_price_wmedian_smoothed_growth,cumulative_taxable_income_growth,cumulative_total_tax_growth,cumulative_net_migration_ratio,cumulative_new_dwellings_ratio,cumulative_taxpayer_count_growth
18055,-0.380711,48804.375141,2009,1,66.0,24475.0,32971.0,18858.0,57521629.0,3050.250769,-0.084796,-0.075283,10489161.0,-0.047428,117.0,36941.0,0.000607,0.003167,1,1,1,1,22205,Shizuoka-ken Atami-shi,,0.915204,0.952572,1.000607,1.003167,
18056,-0.437615,35947.712418,2010,2,81.0,34870.0,32991.0,18391.0,52691475.0,2865.068512,-0.083971,-0.060711,10502168.0,0.00124,328.0,37058.0,0.002425,0.008851,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.736567,0.838354,0.953753,1.003033,1.012046,0.975236
18057,-0.355986,31430.709603,2011,3,51.0,17320.0,33071.0,17776.0,50732226.0,2853.97311,-0.037183,-0.003873,10226135.0,-0.026283,341.0,37386.0,0.004929,0.009121,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.644014,0.807181,0.928685,1.007977,1.021277,0.942624
18058,-0.179854,29482.359962,2012,4,80.0,28630.0,33234.0,17216.0,48997548.0,2846.047165,-0.034193,-0.002777,9791024.0,-0.042549,101.0,37727.0,0.004453,0.002677,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.604093,0.779581,0.889171,1.012465,1.024011,0.912928
18059,-0.040344,30162.666029,2013,5,77.0,34195.0,33382.0,17136.0,48967696.0,2857.59197,-0.000609,0.004056,9793374.0,0.00024,110.0,37828.0,0.003475,0.002908,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.618032,0.779106,0.889384,1.015984,1.026989,0.908686
18060,0.150013,33905.109157,2014,6,67.0,27145.0,33498.0,17165.0,50435446.0,2938.272415,0.029974,0.028234,9967010.0,0.01773,384.0,37938.0,-0.000478,0.010122,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.694715,0.802459,0.905153,1.015498,1.037384,0.910224
18061,0.293594,39018.25236,2015,7,68.0,27400.0,33482.0,16940.0,50358761.0,2972.772196,-0.00152,0.011742,9762576.0,-0.020511,142.0,38322.0,0.00451,0.003705,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.799483,0.801239,0.886587,1.020078,1.041228,0.898293
18062,0.270834,43087.769156,2016,8,74.0,28440.0,33633.0,17044.0,51725102.0,3034.798287,0.027132,0.020865,9757556.0,-0.000514,63.0,38464.0,0.004936,0.001638,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.882867,0.822978,0.886131,1.025113,1.042933,0.903807
18063,0.061243,41407.846745,2017,9,81.0,31130.0,33799.0,17137.0,51994130.0,3034.027543,0.005201,-0.000254,9754325.0,-0.000331,103.0,38527.0,0.010562,0.002673,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.848445,0.827258,0.885838,1.035941,1.045722,0.908739
18064,-0.168853,35812.25781,2018,10,66.0,22720.0,34156.0,17367.0,56631471.0,3260.866644,0.08919,0.074765,9898571.0,0.014788,59.0,38630.0,0.000937,0.001527,1,1,1,1,22205,Shizuoka-ken Atami-shi,0.733792,0.901041,0.898938,1.036911,1.047319,0.920935


In [None]:
# Features: Sources of Taxable Income