In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import factor_data_paths, get_derived_csv_path
from jre_utils.config import asset_types, statistics, area_levels, period_cols
from jre_utils.visualize import plot_time_series
from jre_utils.process import (
    get_most_active_municipalities,
    get_highest_growth_municipalities,
    get_cumulative_growth,
)

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
factor = "migration"
factor_column = "net_migration_ratio"

In [13]:
period = "yearly"
area_level = "municipality"
asset_type = "building"
statistic = "median"

granularity_columns = area_levels[area_level]["columns"]
group_by_columns = granularity_columns + [period_cols[period]]
display_columns = list(statistics[statistic].keys())

label = asset_types[asset_type]["label"]

metric = asset_types[asset_type]["metric"]
metric_pct_chg = asset_types[asset_type]["metric_pct_chg"]
upcoming_metric = "Upcoming" + metric
upcoming_metric_pct_chg = "Upcoming" + metric_pct_chg

In [14]:
core_df_path = get_derived_csv_path(period, area_level, asset_type, statistic)
factor_path = factor_data_paths["processed"][factor][area_level]

In [15]:
df = pd.read_csv(core_df_path)
factor_df = pd.read_csv(factor_path)

# time box
start_year = 2005
end_year = 2022
df = df[(df["year"] >= start_year) & (df["year"] <= end_year)]

df = df.merge(factor_df, on=group_by_columns, how="left")
df = df[~df[["Prefecture", "Municipality", "year"]].duplicated()]

# prepare metrics
df[metric_pct_chg] = df.groupby(granularity_columns)[metric].pct_change(fill_method="ffill").fillna(0) # fill na with 0 for visualizations

# set up target variables
df[upcoming_metric] = df.groupby(granularity_columns)[metric].shift(-1)
df[upcoming_metric_pct_chg] = df.groupby(granularity_columns)[metric_pct_chg].shift(-1)

In [16]:
df[(df["Prefecture"] == "Tokyo") & (df["Municipality"] == "Chiyoda")]

Unnamed: 0,Prefecture,Municipality,year,TradePricePerArea,UnitPrice,Count,net_migration_ratio,TradePricePctChg,UpcomingTradePricePerArea,UpcomingTradePricePctChg
2450,Tokyo,Chiyoda,2005,1680000.0,,47,0.019842,0.0,2111111.0,0.256614
2451,Tokyo,Chiyoda,2006,2111111.0,,99,0.019416,0.256614,2400000.0,0.136842
2452,Tokyo,Chiyoda,2007,2400000.0,,76,0.008594,0.136842,2333333.0,-0.027778
2453,Tokyo,Chiyoda,2008,2333333.0,,59,0.010735,-0.027778,1920000.0,-0.177143
2454,Tokyo,Chiyoda,2009,1920000.0,,53,0.019982,-0.177143,2083035.0,0.084914
2455,Tokyo,Chiyoda,2010,2083035.0,,46,0.012579,0.084914,1545455.0,-0.258075
2456,Tokyo,Chiyoda,2011,1545455.0,,61,0.013117,-0.258075,1488889.0,-0.036601
2457,Tokyo,Chiyoda,2012,1488889.0,,57,0.019949,-0.036601,2162162.0,0.452198
2458,Tokyo,Chiyoda,2013,2162162.0,,63,0.035358,0.452198,2037500.0,-0.057656
2459,Tokyo,Chiyoda,2014,2037500.0,,68,0.043446,-0.057656,2124060.0,0.042484


In [17]:
# comparables = ["Tokyo_Minato", "Tokyo_Chuo"]
comparables = ["Niigata_Yuzawa"]

frequency_n = 500
return_n = 10
factor_n = 10

frequency_df = get_most_active_municipalities(
    df.copy(), n=frequency_n, keep=comparables
)

return_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=metric_pct_chg,
    cumulative_column="return_growth",
    end_year=2021,
    n=return_n,
    keep=comparables,
)

return_df["factor_growth"] = get_cumulative_growth(return_df.copy(), factor_column)

factor_growth_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=factor_column,
    cumulative_column="factor_growth",
    end_year=2021,
    n=factor_n,
    keep=comparables,
)

factor_growth_df["return_growth"] = get_cumulative_growth(
    factor_growth_df.copy(), metric_pct_chg
)

tokyo_df = frequency_df[frequency_df["Prefecture"] == "Tokyo"]

In [18]:
plot_time_series(
    factor_growth_df.copy(),
    "return_growth",
    group_by_columns,
    granularity_columns,
    f"Cumulative Price Growth for top and bottom {return_n} municipalities",
    visible="legendonly",
)

In [19]:
plot_time_series(
    factor_growth_df.copy(),
    "factor_growth",
    group_by_columns,
    granularity_columns,
    f"cumulative {factor_column} for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [10]:
1 - 0.3 / 3

0.9

In [11]:
plot_time_series(
    factor_growth_df.copy(),
    "Count",
    group_by_columns,
    granularity_columns,
    f"Count for top and bottom {return_n} municipalities",
    visible="legendonly",
)

In [12]:
# See if you can make a liquid long short equity strategy using REITs

# MVP
# Validating