In [31]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths, get_derived_csv_path
from jre_utils.config import asset_types
from jre_utils.visualize import plot_time_series
from jre_utils.process import (
    get_most_active_municipalities,
    get_highest_growth_municipalities,
    get_cumulative_growth,
)

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [79]:
factor = "taxable_income"
factor_column = "taxable_income_growth"

In [80]:
asset_type = "building"
# granularity_columns = ["area"]
granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

label = asset_types[asset_type]["label"]

# metric = "unit_price_wmean"
metric = "unit_price_wmedian"
# metric = "unit_price_mean"
# metric = "unit_price_median"
metric_pct_chg = metric + "_pct_chg"
upcoming_metric = "upcoming_" + metric
upcoming_metric_pct_chg = "upcoming_" + metric_pct_chg

In [86]:
core_df_path = get_derived_csv_path(asset_type)
factor_path = factor_data_paths["processed"][factor]["municipality"]

df = pd.read_csv(core_df_path)
factor_df = pd.read_csv(factor_path)

# time box
start_year = 2010
end_year = 2022
df = df[(df["year"] >= start_year) & (df["year"] <= end_year)]

df = df.merge(factor_df, on=group_by_columns, how="left")
df = df.sort_values(by=["year", "area_code"])

# prepare metrics
df[metric_pct_chg] = df.groupby(granularity_columns)[metric].pct_change(fill_method="ffill").fillna(0) # fill na with 0 for visualizations

# set up target variables
df[upcoming_metric] = df.groupby(granularity_columns)[metric].shift(-1)
df[upcoming_metric_pct_chg] = df.groupby(granularity_columns)[metric_pct_chg].shift(-1)

In [87]:
df[df["area_code"] == 13101] # Tokyo-to Chiyoda-ku

Unnamed: 0,year,area_code,area,unit_price_wmean,unit_price_wmedian,unit_price_mean,unit_price_median,total_traded_area,count,taxable_income,taxpayer_count,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth,unit_price_wmedian_pct_chg,upcoming_unit_price_wmedian,upcoming_unit_price_wmedian_pct_chg
2434,2010,13101,Tokyo-to Chiyoda-ku,4694486.0,3333333.0,2883100.0,2083035.0,10700.0,46.0,217272181.0,28171.0,7712.618686,-0.034085,-0.044303,0.0,2000000.0,-0.4
2379,2011,13101,Tokyo-to Chiyoda-ku,2348582.0,2000000.0,1944476.0,1545455.0,13575.0,61.0,233634297.0,28440.0,8214.989346,0.075307,0.065136,-0.4,1911478.0,-0.044261
2322,2012,13101,Tokyo-to Chiyoda-ku,2684736.0,1911478.0,1964714.0,1488889.0,10810.0,57.0,219420257.0,28761.0,7629.089983,-0.060839,-0.071321,-0.044261,2916571.0,0.52582
2263,2013,13101,Tokyo-to Chiyoda-ku,3266448.0,2916571.0,2702967.0,2162162.0,12190.0,63.0,233174299.0,29730.0,7843.064211,0.062684,0.028047,0.52582,3460407.0,0.186464
2205,2014,13101,Tokyo-to Chiyoda-ku,3575147.0,3460407.0,2645518.0,2037500.0,15330.0,68.0,280560533.0,31214.0,8988.291568,0.203222,0.146018,0.186464,3000000.0,-0.13305
2148,2015,13101,Tokyo-to Chiyoda-ku,3761074.0,3000000.0,2865819.0,2124060.0,11355.0,70.0,279917309.0,32993.0,8484.142364,-0.002293,-0.05609,-0.13305,3562819.0,0.187606
2090,2016,13101,Tokyo-to Chiyoda-ku,3597246.0,3562819.0,3001553.0,2785714.0,10530.0,64.0,314359478.0,34324.0,9158.591015,0.123044,0.079495,0.187606,4118628.0,0.156003
2032,2017,13101,Tokyo-to Chiyoda-ku,4685343.0,4118628.0,4216382.0,3699248.0,11530.0,66.0,333664476.0,35326.0,9445.294571,0.061411,0.031304,0.156003,3158306.0,-0.233166
1975,2018,13101,Tokyo-to Chiyoda-ku,4059672.0,3158306.0,3767976.0,3061189.0,11580.0,66.0,362690825.0,36299.0,9991.758037,0.086993,0.057856,-0.233166,4417898.0,0.398819
1917,2019,13101,Tokyo-to Chiyoda-ku,5296680.0,4417898.0,3891026.0,3095455.0,10995.0,64.0,412894018.0,38175.0,10815.822344,0.138419,0.082474,0.398819,4112103.0,-0.069217


In [88]:
comparables = [
    13102, # Tokyo-to Chuo-ku
    13103, # Tokyo-to Minato-ku
    15461, # Niigata-ken Yuzawa-Machi
    20321 # Nagano-ken Karuisawa-machi
]

frequency_n = 500
return_n = 10
factor_n = 10

frequency_df = get_most_active_municipalities(
    df.copy(), n=frequency_n, keep=comparables
)

return_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=metric_pct_chg,
    cumulative_column="return_growth",
    end_year=2021,
    n=return_n,
    keep=comparables,
)

return_df["factor_growth"] = get_cumulative_growth(return_df.copy(), factor_column)

factor_growth_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=factor_column,
    cumulative_column="factor_growth",
    end_year=2021,
    n=factor_n,
    keep=comparables,
)

factor_growth_df["return_growth"] = get_cumulative_growth(
    factor_growth_df.copy(), metric_pct_chg
)

# Tokyo Area Codes are 13000 + n. Therefore, if area_code // 1000 == 13, it is in Tokyo
tokyo_df = frequency_df[frequency_df["area_code"] // 1000 == 13]

In [90]:
plot_time_series(
    factor_growth_df.copy(),
    "return_growth",
    group_by_columns,
    granularity_columns,
    f"Cumulative Price Growth for top and bottom {return_n} municipalities",
    visible="legendonly",
)

In [85]:
plot_time_series(
    factor_growth_df.copy(),
    "factor_growth",
    group_by_columns,
    granularity_columns,
    f"cumulative {factor_column} for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [58]:
plot_time_series(
    factor_growth_df.copy(),
    "count",
    group_by_columns,
    granularity_columns,
    f"Count for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [12]:
# See if you can make a liquid long short equity strategy using REITs

# MVP
# Validating