In [1]:
import warnings

import pandas as pd

from jre_utils.datapath import (
    factor_data_paths,
    get_derived_csv_path,
    get_derived_lpa_path,
    get_derived_plps_path,
)
from jre_utils.config import asset_types
from jre_utils.visualize import plot_time_series
from jre_utils.process import (
    get_most_active_municipalities, 
    get_highest_growth_municipalities,
    get_cumulative_growth,
)

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [2]:
asset_type = "building"

factor = "lfs_revenue_breakdown"
factor_column = "total_tax_growth"

metrics = {
    "weighted_mean": "unit_price_wmean",
    "weighted_median": "unit_price_wmedian",
    "mean": "unit_price_mean",
    "median": "unit_price_median",
}

dataset_paths = {
    "main": get_derived_csv_path(asset_type),
    "lpa": get_derived_lpa_path(),
    "plps": get_derived_plps_path()
}

In [3]:
granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]
display_columns = ["unit_price", "total_traded_area", "count"]

metric = metrics["weighted_median"]
metric_pct_chg = metric + "_pct_chg"
upcoming_metric = "upcoming_" + metric
upcoming_metric_pct_chg = "upcoming_" + metric_pct_chg

In [4]:
core_df_path = dataset_paths["plps"]
factor_path = factor_data_paths["processed"][factor]["municipality"]

df = pd.read_csv(core_df_path)
factor_df = pd.read_csv(factor_path)

# time box
# start_year = 1975
start_year = 2010
end_year = 2022
df = df[(df["year"] >= start_year) & (df["year"] <= end_year)]

df = df.merge(factor_df, on=group_by_columns, how="left")
df = df.sort_values(by=["year", "area_code"])

# prepare metrics
df[metric_pct_chg] = df.groupby(granularity_columns)[metric].pct_change(fill_method="ffill").fillna(0) # fill na with 0 for visualizations

# set up target variables
df[upcoming_metric] = df.groupby(granularity_columns)[metric].shift(-1)
df[upcoming_metric_pct_chg] = df.groupby(granularity_columns)[metric_pct_chg].shift(-1)

In [5]:
df[df["area_code"] == 13101][["year", "area_code", "area", factor_column, metric_pct_chg, upcoming_metric_pct_chg]] # Tokyo-to Chiyoda-ku

Unnamed: 0,year,area_code,area,total_tax_growth,unit_price_wmedian_pct_chg,upcoming_unit_price_wmedian_pct_chg
613,2010,13101,Tokyo-to Chiyoda-ku,-0.033539,0.0,-0.031281
2311,2011,13101,Tokyo-to Chiyoda-ku,0.085495,-0.031281,-0.153962
4014,2012,13101,Tokyo-to Chiyoda-ku,-0.024638,-0.153962,1.827193
5721,2013,13101,Tokyo-to Chiyoda-ku,0.044327,1.827193,-0.099529
7430,2014,13101,Tokyo-to Chiyoda-ku,0.080608,-0.099529,0.30174
9140,2015,13101,Tokyo-to Chiyoda-ku,0.046389,0.30174,0.101614
10851,2016,13101,Tokyo-to Chiyoda-ku,0.057706,0.101614,0.075592
12564,2017,13101,Tokyo-to Chiyoda-ku,0.034839,0.075592,0.081266
14280,2018,13101,Tokyo-to Chiyoda-ku,0.038361,0.081266,0.076747
15996,2019,13101,Tokyo-to Chiyoda-ku,0.092478,0.076747,-0.04347


In [6]:
comparables = [
    13102,  # Tokyo-to Chuo-ku
    13103,  # Tokyo-to Minato-ku
    15461,  # Niigata-ken Yuzawa-Machi
    20321,  # Nagano-ken Karuisawa-machi
]

frequency_n = 500
return_n = 10
factor_n = 10

frequency_df = get_most_active_municipalities(
    df.copy(), n=frequency_n, keep=comparables
)

return_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=metric_pct_chg,
    cumulative_column="return_growth",
    end_year=2021,
    n=return_n,
    keep=comparables,
)

return_df["factor_growth"] = get_cumulative_growth(return_df.copy(), factor_column)

factor_growth_df = get_highest_growth_municipalities(
    frequency_df.copy(),
    column=factor_column,
    cumulative_column="factor_growth",
    end_year=2021,
    n=factor_n,
    keep=comparables,
)

factor_growth_df["return_growth"] = get_cumulative_growth(
    factor_growth_df.copy(), metric_pct_chg
)

# Tokyo Area Codes are 13000 + n. Therefore, if area_code // 1000 == 13, it is in Tokyo
tokyo_df = frequency_df[frequency_df["area_code"] // 1000 == 13]
tokyo_df["return_growth"] = get_cumulative_growth(tokyo_df.copy(), metric_pct_chg)
tokyo_df["factor_growth"] = get_cumulative_growth(tokyo_df.copy(), factor_column)

In [7]:
plot_df = tokyo_df.copy()

In [8]:
plot_time_series(
    plot_df,
    "return_growth",
    group_by_columns,
    granularity_columns,
    f"Cumulative Price Growth for top and bottom {return_n} municipalities",
    visible="legendonly",
)

In [9]:
plot_time_series(
    plot_df,
    "factor_growth",
    group_by_columns,
    granularity_columns,
    f"cumulative {factor_column} for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [10]:
plot_time_series(
    plot_df,
    "count",
    group_by_columns,
    granularity_columns,
    f"Count for top and bottom {return_n} municipalities",
    # visible="legendonly",
)

In [11]:
# See if you can make a liquid long short equity strategy using REITs

# MVP
# Validating