In [87]:
import warnings

import numpy as np
import pandas as pd

from jre_utils.datapath import (
    factor_data_paths,
    model_ready_data_paths,
    get_derived_csv_path,
)
from jre_utils.config import asset_types

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [88]:
asset_type = "combined"
dataset_key = "transactions"
years_ahead = 2

metrics = {
    "median": "unit_price_median",
    "gmean": "unit_price_gmean",
}

granularity_columns = ["area", "area_code"]
group_by_columns = granularity_columns + ["year"]

metric_key = "gmean"
metric = metrics[metric_key]
metric_pct_chg = metric + "_pct_chg"


In [89]:
population_path = factor_data_paths["processed"]["population"]["municipality"]
migration_path = factor_data_paths["processed"]["migration"]["municipality"]
taxable_income_path = factor_data_paths["processed"]["taxable_income"]["municipality"]
new_dwellings_path = factor_data_paths["processed"]["new_dwellings"]["municipality"]
lfs_revenue_path = factor_data_paths["processed"]["lfs_revenue_breakdown"]["municipality"]

dataset_name = f"sequence_{dataset_key}_{asset_type}_{metric_key}_{years_ahead}"
model_ready_data_path = model_ready_data_paths[dataset_name]

In [90]:
# concal all asset types horizontally here
# Once creating time series dataset,
# create 3 datasets and concat them vertically
# To do that, loop 3 times with different metrics

In [91]:
id_columns = ["area_code", "area", "year"]

factor_log_normalize_columns = [
    "in_migrations",
    "out_migrations",
    "population",
    "taxpayer_count",
    "taxable_income",
    "taxable_income_per_taxpayer",
    "total_tax",
    "new_dwellings",
    "existing_dwellings",

    # Log normalizing seems to be good for everything!
    "total_tax_growth",
    "taxable_income_growth",
    "taxable_income_per_taxpayer_growth",
    "net_migration_ratio",
    "new_dwellings_ratio",
    "taxpayer_count_growth",
]

factor_normalize_columns = [
    "total_tax_growth",
    "taxable_income_growth",
    "taxable_income_per_taxpayer_growth",
    "net_migration_ratio",
    "new_dwellings_ratio",
    "taxpayer_count_growth",
]

factor_maintain_columns = [
    "migrations_is_available",
    "taxable_income_is_available",
    "dwellings_is_available",
    "total_tax_is_available",
]

factor_columns = (
    [f"{column}_log_normalized_yearly" for column in factor_log_normalize_columns]
    + [f"{column}_normalized_yearly" for column in factor_normalize_columns]
    + factor_maintain_columns
)

final_factor_columns = list(set(factor_normalize_columns + factor_log_normalize_columns + factor_columns))

In [92]:
core_log_normalize_columns = ["count", metric, "yearly_price_growth", metric_pct_chg]
core_normalize_columns = ["count_growth", "yearly_price_growth", metric_pct_chg]
core_maintain_columns = ["metric_pct_chg_is_available"]

core_columns = (
    [f"{column}_log_normalized_yearly" for column in core_log_normalize_columns]
    + [f"{column}_normalized_yearly" for column in core_normalize_columns]
    + core_maintain_columns
)

final_core_columns = list(set(core_normalize_columns + core_log_normalize_columns + core_columns))

combined_final_core_columns = [f"{asset_type}_{column}" for column in final_core_columns for asset_type in asset_types]

In [93]:
combined_final_core_columns

['land_yearly_price_growth_log_normalized_yearly',
 'building_yearly_price_growth_log_normalized_yearly',
 'condo_yearly_price_growth_log_normalized_yearly',
 'land_yearly_price_growth_normalized_yearly',
 'building_yearly_price_growth_normalized_yearly',
 'condo_yearly_price_growth_normalized_yearly',
 'land_yearly_price_growth',
 'building_yearly_price_growth',
 'condo_yearly_price_growth',
 'land_unit_price_gmean_pct_chg_log_normalized_yearly',
 'building_unit_price_gmean_pct_chg_log_normalized_yearly',
 'condo_unit_price_gmean_pct_chg_log_normalized_yearly',
 'land_count_growth',
 'building_count_growth',
 'condo_count_growth',
 'land_unit_price_gmean',
 'building_unit_price_gmean',
 'condo_unit_price_gmean',
 'land_unit_price_gmean_log_normalized_yearly',
 'building_unit_price_gmean_log_normalized_yearly',
 'condo_unit_price_gmean_log_normalized_yearly',
 'land_count_log_normalized_yearly',
 'building_count_log_normalized_yearly',
 'condo_count_log_normalized_yearly',
 'land_unit_

In [94]:
final_columns = id_columns + combined_final_core_columns + final_factor_columns

In [95]:
derived_dfs = {}

for asset_type in asset_types:
    df =  pd.read_csv(get_derived_csv_path(asset_type))
    df = df.sort_values(by=group_by_columns, ascending=True)
    df = df[group_by_columns + [metric, "count"]]

    # prepare main metric
    df[metric_pct_chg] = df.groupby(granularity_columns)[metric].pct_change(periods=years_ahead)

    # prepare additional factors
    df["count_growth"] = df.groupby(granularity_columns)["count"].pct_change()
    df["yearly_price_growth"] = df.groupby(granularity_columns)[metric].pct_change()
    df["metric_pct_chg_is_available"] = df[metric_pct_chg].notnull().astype(int)

    for column in core_log_normalize_columns:
        df[f"{column}_log"] = df[column].apply(lambda x: np.log10(1 + x))
        df[f"{column}_log_normalized_yearly"] = df.groupby("year")[f"{column}_log"].transform(
            lambda x: (x - x.mean()) / x.std()
        )

    for column in core_normalize_columns:
        df[f"{column}_normalized_yearly"] = df.groupby("year")[column].transform(
            lambda x: (x - x.mean()) / x.std()
        )

    df = df[group_by_columns + final_core_columns]
    
    derived_dfs[asset_type] = df.rename(
        columns={column: f"{asset_type}_{column}" for column in final_core_columns}
    )

combined_derived_dfs = (
    derived_dfs["building"].merge(
        derived_dfs["land"],
        on=["year", "area_code", "area"],
        how="outer",
    ).merge(
        derived_dfs["condo"],
        on=["year", "area_code", "area"],
        how="outer",
    )
)

In [96]:
combined_derived_dfs

Unnamed: 0,area,area_code,year,building_yearly_price_growth_log_normalized_yearly,building_yearly_price_growth_normalized_yearly,building_yearly_price_growth,building_unit_price_gmean_pct_chg_log_normalized_yearly,building_count_growth,building_unit_price_gmean,building_unit_price_gmean_log_normalized_yearly,building_count_log_normalized_yearly,building_unit_price_gmean_pct_chg,building_count_growth_normalized_yearly,building_metric_pct_chg_is_available,building_count,building_unit_price_gmean_pct_chg_normalized_yearly,land_yearly_price_growth_log_normalized_yearly,land_yearly_price_growth_normalized_yearly,land_yearly_price_growth,land_unit_price_gmean_pct_chg_log_normalized_yearly,land_count_growth,land_unit_price_gmean,land_unit_price_gmean_log_normalized_yearly,land_count_log_normalized_yearly,land_unit_price_gmean_pct_chg,land_count_growth_normalized_yearly,land_metric_pct_chg_is_available,land_count,land_unit_price_gmean_pct_chg_normalized_yearly,condo_yearly_price_growth_log_normalized_yearly,condo_yearly_price_growth_normalized_yearly,condo_yearly_price_growth,condo_unit_price_gmean_pct_chg_log_normalized_yearly,condo_count_growth,condo_unit_price_gmean,condo_unit_price_gmean_log_normalized_yearly,condo_count_log_normalized_yearly,condo_unit_price_gmean_pct_chg,condo_count_growth_normalized_yearly,condo_metric_pct_chg_is_available,condo_count,condo_unit_price_gmean_pct_chg_normalized_yearly
0,Aichi-ken Agui-cho,23441,2007,,,,,,67590.233575,0.066006,-1.205808,,,0.0,6.0,,,,,,,54723.779572,0.674598,-0.544187,,,0.0,20.0,,,,,,,,,,,,,,
1,Aichi-ken Agui-cho,23441,2008,2.521607,3.158972,1.105567,,1.333333,142315.742351,0.968399,-0.674282,,0.497956,0.0,14.0,,-0.742933,-0.581869,-0.250691,,0.500000,41004.994789,0.511063,-0.290165,,0.118376,0.0,30.0,,,,,,,,,,,,,,
2,Aichi-ken Agui-cho,23441,2009,0.348771,0.151198,0.024848,2.699531,1.071429,145852.056278,1.103548,-0.183325,1.157887,1.470449,1.0,29.0,3.510887,1.343899,1.331892,0.473600,0.596489,0.233333,60424.944041,0.964866,-0.023743,0.104181,0.479061,1.0,37.0,0.359541,,,,,,,,,,,,,
3,Aichi-ken Agui-cho,23441,2010,0.008386,-0.133281,-0.019251,0.337939,0.413793,143044.196187,1.082660,0.055760,0.005119,0.690053,1.0,41.0,0.093175,0.153491,-0.019356,0.044494,1.475480,0.459459,63113.475459,1.015879,0.231435,0.539166,0.515716,1.0,54.0,1.443527,,,,,,,,,,,,,
4,Aichi-ken Agui-cho,23441,2011,0.063796,-0.085690,0.010080,0.071647,-0.024390,144486.087473,1.102463,0.100640,-0.009365,0.004318,1.0,40.0,-0.082755,0.338276,0.159122,0.098042,0.444827,-0.203704,69301.255966,1.110362,0.057424,0.146898,-0.533401,1.0,43.0,0.204269,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21230,Yamanashi-ken Oshino-mura,19424,2019,,,,,,,,,,,,,,1.912959,1.490448,1.101751,2.088592,-0.181818,23205.212513,0.237574,-1.285960,1.366668,-0.415074,1.0,9.0,1.809799,,,,,,,,,,,,,
21231,Yamanashi-ken Oshino-mura,19424,2020,,,,,,,,,,,,,,-0.585510,-0.585584,-0.214747,1.184518,0.333333,18221.953987,0.046115,-1.091118,0.650406,0.438089,1.0,12.0,0.756029,,,,,,,,,,,,,
21232,Yamanashi-ken Oshino-mura,19424,2021,,,,,,,,,,,,,,1.152940,0.929838,0.612252,0.557395,-0.250000,29378.375210,0.418723,-1.340108,0.266025,-0.791044,1.0,9.0,0.299287,,,,,,,,,,,,,
21233,Yamanashi-ken Oshino-mura,19424,2022,,,,,,,,,,,,,,-1.679992,-1.211575,-0.497405,-0.516146,-0.111111,14765.418036,-0.144001,-1.345537,-0.189691,-0.132958,1.0,8.0,-0.549031,,,,,,,,,,,,,


In [97]:
df = combined_derived_dfs

population_df = pd.read_csv(population_path)
migration_df = pd.read_csv(migration_path)
taxable_income_df = pd.read_csv(taxable_income_path)
new_dwellings_df = pd.read_csv(new_dwellings_path)
lfs_revenue_df= pd.read_csv(lfs_revenue_path)

df = (
    df.merge(population_df, on=group_by_columns, how="left")
    .merge(migration_df, on=group_by_columns, how="left")
    .merge(taxable_income_df, on=group_by_columns, how="left")
    .merge(new_dwellings_df, on=group_by_columns, how="left")
    .merge(lfs_revenue_df, on=group_by_columns, how="left")
)

In [98]:
df["migrations_is_available"] = df["net_migration_ratio"].notnull().astype(int)
df["taxable_income_is_available"] = df["taxable_income"].notnull().astype(int)
df["total_tax_is_available"] = df["total_tax"].notnull().astype(int)
df["dwellings_is_available"] = df["new_dwellings"].notnull().astype(int)

In [99]:
taxable_income_df

Unnamed: 0,year,area_code,area,taxable_income,taxpayer_count,taxpayer_count_growth,taxpayer_count_log,taxpayer_count_log_normalized_yearly,taxpayer_count_growth_log,taxpayer_count_growth_log_normalized_yearly,taxpayer_count_growth_normalized_yearly,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth,taxable_income_log,taxable_income_log_normalized_yearly,taxable_income_per_taxpayer_log,taxable_income_per_taxpayer_log_normalized_yearly,taxable_income_growth_log,taxable_income_growth_log_normalized_yearly,taxable_income_per_taxpayer_growth_log,taxable_income_per_taxpayer_growth_log_normalized_yearly,taxable_income_growth_normalized_yearly,taxable_income_per_taxpayer_growth_normalized_yearly
0,2021,1100,Hokkaido Sapporo-shi,2.971366e+09,906411.0,0.003444,5.957326,2.853349,0.001493,0.335186,0.336425,3278.166013,0.032400,0.028857,9.472956,2.776101,3.515763,0.702540,0.013848,0.150340,0.012355,-0.008686,0.131738,-0.026130
1,2021,1202,Hokkaido Hakodate-shi,3.194402e+08,107534.0,-0.015292,5.031550,1.505360,-0.006693,-0.681591,-0.705828,2970.597039,0.016612,0.032400,8.504390,1.435003,3.472990,0.037166,0.007155,-0.242067,0.013848,0.087863,-0.260901,0.069790
2,2021,1203,Hokkaido Otaru-shi,1.243333e+08,46190.0,-0.013582,4.664557,0.970996,-0.005939,-0.587980,-0.610690,2691.778827,0.028630,0.042793,8.094587,0.867582,3.430201,-0.628452,0.012259,0.057170,0.018198,0.369178,0.037965,0.351160
3,2021,1204,Hokkaido Asahikawa-shi,4.193510e+08,142560.0,0.000681,5.154001,1.683656,0.000296,0.186437,0.182719,2941.575168,0.038224,0.037518,8.622578,1.598649,3.468728,-0.029138,0.016291,0.293581,0.015996,0.226747,0.276577,0.208349
4,2021,1205,Hokkaido Muroran-shi,1.052086e+08,34835.0,-0.010875,4.542028,0.792585,-0.004749,-0.440141,-0.460104,3020.199455,0.009337,0.020434,8.022051,0.767148,3.480179,0.149004,0.004036,-0.424940,0.008785,-0.239547,-0.441825,-0.254155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64371,1985,47361,Okinawa-ken Kumejima-cho,3.992455e+06,1819.0,,3.260071,-1.095376,,,,2194.862562,,,6.601240,-1.037738,3.341605,-0.076708,,,,,,
64372,1985,47362,Okinawa-ken Yaese-cho,9.023653e+06,4997.0,,3.698796,-0.386018,,,,1805.814088,,,6.955382,-0.499835,3.256913,-1.225738,,,,,,
64373,1985,47375,Okinawa-ken Tarama-son,4.395900e+05,196.0,,2.294466,-2.656629,,,,2242.806122,,,5.643049,-2.493124,3.350985,0.050555,,,,,,
64374,1985,47381,Okinawa-ken Taketomi-cho,1.525627e+06,663.0,,2.822168,-1.803406,,,,2301.096531,,,6.183449,-1.672317,3.362124,0.201669,,,,,,


In [100]:
df

Unnamed: 0,area,area_code,year,building_yearly_price_growth_log_normalized_yearly,building_yearly_price_growth_normalized_yearly,building_yearly_price_growth,building_unit_price_gmean_pct_chg_log_normalized_yearly,building_count_growth,building_unit_price_gmean,building_unit_price_gmean_log_normalized_yearly,building_count_log_normalized_yearly,building_unit_price_gmean_pct_chg,building_count_growth_normalized_yearly,building_metric_pct_chg_is_available,building_count,building_unit_price_gmean_pct_chg_normalized_yearly,land_yearly_price_growth_log_normalized_yearly,land_yearly_price_growth_normalized_yearly,land_yearly_price_growth,land_unit_price_gmean_pct_chg_log_normalized_yearly,land_count_growth,land_unit_price_gmean,land_unit_price_gmean_log_normalized_yearly,land_count_log_normalized_yearly,land_unit_price_gmean_pct_chg,land_count_growth_normalized_yearly,land_metric_pct_chg_is_available,land_count,land_unit_price_gmean_pct_chg_normalized_yearly,condo_yearly_price_growth_log_normalized_yearly,condo_yearly_price_growth_normalized_yearly,condo_yearly_price_growth,condo_unit_price_gmean_pct_chg_log_normalized_yearly,condo_count_growth,condo_unit_price_gmean,condo_unit_price_gmean_log_normalized_yearly,condo_count_log_normalized_yearly,condo_unit_price_gmean_pct_chg,condo_count_growth_normalized_yearly,condo_metric_pct_chg_is_available,condo_count,condo_unit_price_gmean_pct_chg_normalized_yearly,population,population_log,population_log_normalized_yearly,in_migrations,out_migrations,net_migration_ratio,in_migrations_log,in_migrations_log_normalized_yearly,out_migrations_log,out_migrations_log_normalized_yearly,net_migration_ratio_log,net_migration_ratio_log_normalized_yearly,net_migration_ratio_normalized_yearly,taxable_income,taxpayer_count,taxpayer_count_growth,taxpayer_count_log,taxpayer_count_log_normalized_yearly,taxpayer_count_growth_log,taxpayer_count_growth_log_normalized_yearly,taxpayer_count_growth_normalized_yearly,taxable_income_per_taxpayer,taxable_income_growth,taxable_income_per_taxpayer_growth,taxable_income_log,taxable_income_log_normalized_yearly,taxable_income_per_taxpayer_log,taxable_income_per_taxpayer_log_normalized_yearly,taxable_income_growth_log,taxable_income_growth_log_normalized_yearly,taxable_income_per_taxpayer_growth_log,taxable_income_per_taxpayer_growth_log_normalized_yearly,taxable_income_growth_normalized_yearly,taxable_income_per_taxpayer_growth_normalized_yearly,new_dwellings,existing_dwellings,new_dwellings_ratio,new_dwellings_log,new_dwellings_log_normalized_yearly,existing_dwellings_log,existing_dwellings_log_normalized_yearly,new_dwellings_ratio_log,new_dwellings_ratio_log_normalized_yearly,new_dwellings_ratio_normalized_yearly,total_tax,total_tax_growth,total_tax_log,total_tax_log_normalized_yearly,total_tax_growth_normalized_yearly,total_tax_growth_log,total_tax_growth_log_normalized_yearly,migrations_is_available,taxable_income_is_available,total_tax_is_available,dwellings_is_available
0,Aichi-ken Agui-cho,23441,2007,,,,,,67590.233575,0.066006,-1.205808,,,0.0,6.0,,,,,,,54723.779572,0.674598,-0.544187,,,0.0,20.0,,,,,,,,,,,,,,,24979.282722,4.397597,-0.062208,901.0,743.690086,0.006298,2.955207,-0.064155,2.871976,-0.309758,0.002726,1.249193,1.250895,41184220.0,11648.0,0.021754,4.066289,0.091212,0.009347,1.014555,1.022130,3535.733173,0.030296,0.008360,7.614731,0.211655,3.548602,1.247030,0.012962,0.647252,0.003615,0.238545,0.337012,0.101139,,,,,,,,,,,3892521.0,0.100463,6.590231,0.180393,0.862704,0.041576,0.909770,1,1,1,0
1,Aichi-ken Agui-cho,23441,2008,2.521607,3.158972,1.105567,,1.333333,142315.742351,0.968399,-0.674282,,0.497956,0.0,14.0,,-0.742933,-0.581869,-0.250691,,0.500000,41004.994789,0.511063,-0.290165,,0.118376,0.0,30.0,,,,,,,,,,,,,,,25136.592636,4.400324,-0.060310,940.0,702.479726,0.009449,2.973590,-0.025579,2.847252,-0.330671,0.004084,1.608139,1.618172,41348658.0,11650.0,0.000172,4.066363,0.092896,0.000075,0.143918,0.136834,3549.241030,0.003993,0.003820,7.616461,0.217377,3.550258,1.306662,0.001731,0.266139,0.001656,0.220941,0.288081,0.260584,,,,,,,,,,,3957657.0,0.016734,6.597438,0.181904,0.214068,0.007207,0.371623,1,1,1,0
2,Aichi-ken Agui-cho,23441,2009,0.348771,0.151198,0.024848,2.699531,1.071429,145852.056278,1.103548,-0.183325,1.157887,1.470449,1.0,29.0,3.510887,1.343899,1.331892,0.473600,0.596489,0.233333,60424.944041,0.964866,-0.023743,0.104181,0.479061,1.0,37.0,0.359541,,,,,,,,,,,,,,25374.112910,4.404408,-0.050007,1013.0,692.112910,0.012646,3.006038,0.021508,2.840804,-0.322509,0.005458,2.054499,2.068705,40698843.0,11682.0,0.002747,4.067554,0.100264,0.001191,0.557229,0.548504,3483.893426,-0.015716,-0.018412,7.609582,0.221770,3.542190,1.318973,-0.006879,0.247883,-0.008071,-0.142679,0.235759,-0.160658,,,,,,,,,,,3776007.0,-0.045898,6.577033,0.160284,-0.550798,-0.020405,-0.532664,1,1,1,0
3,Aichi-ken Agui-cho,23441,2010,0.008386,-0.133281,-0.019251,0.337939,0.413793,143044.196187,1.082660,0.055760,0.005119,0.690053,1.0,41.0,0.093175,0.153491,-0.019356,0.044494,1.475480,0.459459,63113.475459,1.015879,0.231435,0.539166,0.515716,1.0,54.0,1.443527,,,,,,,,,,,,,,25695.000000,4.409866,-0.040709,1284.0,731.000000,0.021522,3.108903,0.193023,2.864511,-0.229163,0.009248,3.198922,3.214395,37069636.0,11535.0,-0.012583,4.062055,0.112739,-0.005500,0.792210,0.785904,3213.665886,-0.089172,-0.077565,7.569018,0.206799,3.507136,1.092016,-0.040564,-0.662735,-0.035064,-1.709576,-0.655053,-1.644556,,,,,,,,,,,3646545.0,-0.034285,6.561882,0.147278,-0.747217,-0.015151,-0.864771,1,1,1,0
4,Aichi-ken Agui-cho,23441,2011,0.063796,-0.085690,0.010080,0.071647,-0.024390,144486.087473,1.102463,0.100640,-0.009365,0.004318,1.0,40.0,-0.082755,0.338276,0.159122,0.098042,0.444827,-0.203704,69301.255966,1.110362,0.057424,0.146898,-0.533401,1.0,43.0,0.204269,,,,,,,,,,,,,,26248.000000,4.419113,-0.027112,1345.0,800.000000,0.020763,3.129045,0.229653,2.903633,-0.164861,0.008925,1.709822,1.860747,37957523.0,11696.0,0.013958,4.068074,0.123483,0.006020,0.820175,0.827341,3245.342254,0.023952,0.009857,7.579298,0.223379,3.511394,1.164292,0.010280,0.834756,0.004260,0.480175,0.832596,0.467947,,,,,,,,,,,3801941.0,0.042615,6.580005,0.174087,0.958657,0.018124,0.744964,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21230,Yamanashi-ken Oshino-mura,19424,2019,,,,,,,,,,,,,,1.912959,1.490448,1.101751,2.088592,-0.181818,23205.212513,0.237574,-1.285960,1.366668,-0.415074,1.0,9.0,1.809799,,,,,,,,,,,,,,9226.000000,3.965061,-0.680706,544.0,533.000000,0.001192,2.736397,-0.252062,2.727541,-0.378850,0.000517,0.623607,0.630017,26057466.0,4857.0,0.044067,3.686458,-0.456243,0.018728,2.203239,2.257485,5364.930204,0.099716,0.053300,7.415932,-0.054882,3.729645,3.934078,0.041281,2.024991,0.022552,1.213095,2.005447,1.149890,,,,,,,,,,,3033323.0,-0.211496,6.481919,0.882856,-5.744150,-0.103196,-6.618838,1,1,1,0
21231,Yamanashi-ken Oshino-mura,19424,2020,,,,,,,,,,,,,,-0.585510,-0.585584,-0.214747,1.184518,0.333333,18221.953987,0.046115,-1.091118,0.650406,0.438089,1.0,12.0,0.756029,,,,,,,,,,,,,,9237.000000,3.965578,-0.675561,517.0,553.000000,-0.003897,2.714330,-0.269892,2.743510,-0.329453,-0.001696,-0.031338,-0.036765,24199700.0,4903.0,0.009471,3.690550,-0.449730,0.004094,0.503784,0.497297,4935.692433,-0.071295,-0.080008,7.383810,-0.100771,3.693436,3.482917,-0.032122,-1.772486,-0.036216,-2.169419,-1.715346,-2.102265,,,,,,,,,,,3084718.0,0.016943,6.489216,0.877486,0.545702,0.007297,0.565027,1,1,1,0
21232,Yamanashi-ken Oshino-mura,19424,2021,,,,,,,,,,,,,,1.152940,0.929838,0.612252,0.557395,-0.250000,29378.375210,0.418723,-1.340108,0.266025,-0.791044,1.0,9.0,0.299287,,,,,,,,,,,,,,9254.000000,3.966376,-0.671414,548.0,531.000000,0.001837,2.739572,-0.229352,2.725912,-0.330701,0.000797,0.495937,0.513805,22848415.0,4818.0,-0.017336,3.682957,-0.458277,-0.007595,-0.793673,-0.819520,4742.302823,-0.055839,-0.039182,7.358856,-0.151127,3.676081,3.196396,-0.024954,-2.124641,-0.017359,-1.930213,-2.062681,-1.868131,,,,,,,,,,,3074199.0,-0.003410,6.487732,0.875223,0.022043,-0.001483,0.052214,1,1,1,0
21233,Yamanashi-ken Oshino-mura,19424,2022,,,,,,,,,,,,,,-1.679992,-1.211575,-0.497405,-0.516146,-0.111111,14765.418036,-0.144001,-1.345537,-0.189691,-0.132958,1.0,8.0,-0.549031,,,,,,,,,,,,,,9285.000000,3.967829,-0.665887,543.0,512.000000,0.003339,2.735599,-0.225061,2.710117,-0.346871,0.001448,0.558356,0.611009,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3490885.0,0.135543,6.542936,0.883383,1.305209,0.055204,1.668874,1,0,1,0


In [101]:
df = df[final_columns]

In [102]:
df.to_csv(model_ready_data_path, index=False)

In [28]:
df.describe()

Unnamed: 0,area_code,year,land_yearly_price_growth_log_normalized_yearly,building_yearly_price_growth_log_normalized_yearly,condo_yearly_price_growth_log_normalized_yearly,land_yearly_price_growth_normalized_yearly,building_yearly_price_growth_normalized_yearly,condo_yearly_price_growth_normalized_yearly,land_yearly_price_growth,building_yearly_price_growth,condo_yearly_price_growth,land_unit_price_gmean_pct_chg_log_normalized_yearly,building_unit_price_gmean_pct_chg_log_normalized_yearly,condo_unit_price_gmean_pct_chg_log_normalized_yearly,land_count_growth,building_count_growth,condo_count_growth,land_unit_price_gmean,building_unit_price_gmean,condo_unit_price_gmean,land_unit_price_gmean_log_normalized_yearly,building_unit_price_gmean_log_normalized_yearly,condo_unit_price_gmean_log_normalized_yearly,land_count_log_normalized_yearly,building_count_log_normalized_yearly,condo_count_log_normalized_yearly,land_unit_price_gmean_pct_chg,building_unit_price_gmean_pct_chg,condo_unit_price_gmean_pct_chg,land_count_growth_normalized_yearly,building_count_growth_normalized_yearly,condo_count_growth_normalized_yearly,land_metric_pct_chg_is_available,building_metric_pct_chg_is_available,condo_metric_pct_chg_is_available,land_count,building_count,condo_count,land_unit_price_gmean_pct_chg_normalized_yearly,building_unit_price_gmean_pct_chg_normalized_yearly,condo_unit_price_gmean_pct_chg_normalized_yearly,in_migrations,taxable_income_log_normalized_yearly,taxable_income_per_taxpayer_growth,new_dwellings_log_normalized_yearly,taxable_income_growth_normalized_yearly,new_dwellings_ratio,net_migration_ratio,total_tax_is_available,population,taxable_income_growth,existing_dwellings_log_normalized_yearly,out_migrations,net_migration_ratio_normalized_yearly,taxable_income_per_taxpayer_growth_normalized_yearly,new_dwellings_ratio_normalized_yearly,migrations_is_available,total_tax_log_normalized_yearly,taxable_income_per_taxpayer_log_normalized_yearly,total_tax,total_tax_growth_normalized_yearly,taxpayer_count_growth_normalized_yearly,taxpayer_count,taxpayer_count_log_normalized_yearly,taxable_income,population_log_normalized_yearly,total_tax_growth,in_migrations_log_normalized_yearly,existing_dwellings,taxpayer_count_growth,out_migrations_log_normalized_yearly,taxable_income_is_available,dwellings_is_available,taxable_income_per_taxpayer,new_dwellings
count,21235.0,21235.0,19393.0,17342.0,4911.0,19393.0,17342.0,4911.0,19393.0,17342.0,4911.0,18100.0,16179.0,4576.0,19393.0,17342.0,4911.0,20721.0,18542.0,5262.0,20721.0,18542.0,5262.0,20721.0,18542.0,5262.0,18100.0,16179.0,4576.0,19393.0,17342.0,4911.0,20721.0,18542.0,5262.0,20721.0,18542.0,5262.0,18100.0,16179.0,4576.0,21234.0,19917.0,19917.0,12175.0,19917.0,12175.0,21234.0,21235.0,21234.0,19917.0,12175.0,21234.0,21234.0,19917.0,12175.0,21235.0,21164.0,19917.0,21164.0,21141.0,19917.0,19917.0,19917.0,19917.0,21234.0,21141.0,21234.0,12175.0,19917.0,21234.0,21235.0,21235.0,19917.0,12175.0
mean,21783.687921,2014.446762,1.465565e-18,1.229171e-18,-5.787357e-18,0.0,3.277789e-18,4.340518e-18,0.051633,0.048543,0.02634,9.814126e-18,4.830935e-18,-1.2422080000000002e-17,0.170641,0.166097,0.106521,46257.97,91275.77,249273.1,-2.7432760000000002e-17,3.801415e-16,-2.59263e-16,-2.853007e-16,-1.870051e-16,-3.024735e-16,0.039459,0.036415,0.040473,-2.931131e-18,-4.916684e-18,-2.8936780000000002e-18,0.87351,0.87256,0.869631,79.942908,90.882483,152.278791,3.1405200000000002e-18,8.783519e-19,-7.763796999999999e-19,3933.123293,0.4001,0.001302,-0.108651,0.034121,0.013678,-0.002976,0.996656,97189.9,0.003273,-0.10227,3960.423098,0.099158,-0.012359,-0.082956,0.999953,0.539913,0.180698,14901160.0,0.061775,0.09268,43634.23,0.404418,146813100.0,0.291269,0.007955,0.248806,68472.53,0.001865,0.251034,0.937933,0.573346,2929.987478,1116.890021
std,13528.582464,4.654486,0.9995874,0.9995386,0.9983693,0.999587,0.9995386,0.9983693,0.505666,0.539019,0.171165,0.9995855,0.9995363,0.9983593,2.46917,3.290677,0.44842,123382.9,179222.3,169939.5,0.9995897,0.9995415,0.998383,0.9995897,0.9995415,0.998383,0.519445,0.447017,0.184228,0.9995874,0.9995386,0.9983693,0.332409,0.333475,0.336741,134.961584,205.93478,347.796617,0.9995855,0.9995363,0.9983593,11819.393312,0.790126,0.04024,1.020543,0.865655,0.007129,0.006382,0.057728,220984.2,0.049345,1.04155,11381.524408,0.71007,0.872374,0.956812,0.006862,0.73122,1.008675,42920780.0,1.129133,0.811183,100350.2,0.774262,378701300.0,0.760988,0.05783,0.784131,133830.0,0.024113,0.784397,0.241284,0.494603,574.984831,2577.655643
min,1100.0,2005.0,-8.702009,-7.364511,-7.344806,-2.397059,-2.951646,-4.135254,-0.96422,-0.938191,-0.77153,-7.184137,-7.689984,-7.702812,-0.903226,-0.855556,-0.884615,74.91156,1065.064,39475.62,-4.288099,-3.552028,-3.158544,-2.294218,-2.035521,-2.526924,-0.946739,-0.942314,-0.749036,-2.293802,-2.415549,-2.906729,0.0,0.0,0.0,3.0,3.0,3.0,-2.30614,-2.475647,-4.977319,10.0,-2.536632,-0.694693,-4.870548,-18.121708,0.0,-0.102674,0.0,387.0,-0.682739,-3.152641,11.0,-7.961075,-23.624752,-2.178107,0.0,-2.493658,-2.4012,52938.0,-15.177904,-14.862366,161.0,-2.603703,418246.0,-2.700291,-0.677251,-2.787129,2190.0,-0.590998,-2.704741,0.0,0.0,1908.27862,0.0
25%,11215.0,2010.0,-0.4335022,-0.4220999,-0.4735323,-0.449793,-0.4278431,-0.5194999,-0.164552,-0.149803,-0.062273,-0.4523124,-0.4293235,-0.4824018,-0.181818,-0.142857,-0.121495,9407.226,23911.17,152299.5,-0.6467369,-0.6967543,-0.656684,-0.721506,-0.7463328,-0.7437905,-0.199186,-0.17072,-0.065329,-0.5279406,-0.5207219,-0.5269214,1.0,1.0,1.0,18.0,15.0,19.0,-0.4745879,-0.4637616,-0.5434024,509.0,-0.193856,-0.009179,-0.826853,-0.255501,0.008991,-0.006586,1.0,17586.0,-0.010883,-0.858108,619.0,-0.306262,-0.25368,-0.724417,1.0,0.00183,-0.500812,2218915.0,-0.302737,-0.277367,7465.0,-0.176746,20154490.0,-0.279643,-0.014169,-0.333947,18040.0,-0.007437,-0.315585,1.0,0.0,2596.501415,178.0
50%,21205.0,2014.0,0.01965118,0.01762598,0.02785531,-0.131575,-0.1192099,-0.05144588,-0.013472,-0.009689,0.020164,0.02932115,0.03261167,0.04504282,0.0,0.022727,0.028037,17247.27,45359.61,197746.1,-0.1280589,-0.07905513,-0.1446921,-0.04838284,-0.1274111,-0.1154106,-0.031672,-0.01903,0.032458,-0.1709586,-0.1527531,-0.1434578,1.0,1.0,1.0,39.0,33.0,46.0,-0.1360691,-0.1226657,-0.04211167,1163.0,0.314574,0.001722,-0.169945,0.014239,0.012692,-0.003098,1.0,38092.0,0.005537,-0.317344,1298.0,0.076093,-0.031954,-0.171896,1.0,0.667247,0.021217,4867354.0,-0.00803,0.08452,16506.0,0.331129,46323230.0,0.219164,0.003698,0.163076,29789.0,0.00261,0.161054,1.0,1.0,2819.637802,407.0
75%,32528.0,2018.0,0.4577,0.4208,0.4811473,0.229421,0.1950658,0.4093882,0.158108,0.136453,0.098459,0.4660268,0.4370456,0.5168795,0.234568,0.244898,0.222222,37424.97,96713.04,272306.7,0.5539358,0.6600848,0.4713997,0.6565215,0.6647817,0.5990424,0.155405,0.139073,0.130662,0.2779252,0.2951703,0.3073336,1.0,1.0,1.0,86.0,86.0,122.0,0.2320975,0.2238496,0.4607151,2935.0,0.88636,0.0117,0.513909,0.283884,0.016838,0.000355,1.0,85745.75,0.020497,0.466554,3011.0,0.464543,0.187873,0.391977,1.0,1.001613,0.69097,12632300.0,0.32839,0.44954,38558.0,0.878663,118763400.0,0.745099,0.022335,0.725726,61801.0,0.011645,0.701415,1.0,1.0,3128.844573,960.5
max,47382.0,2022.0,7.899281,10.25022,6.169861,27.694683,32.05835,9.886444,23.478425,42.673283,1.889849,6.821257,7.880346,4.335699,206.0,324.333333,8.0,2926971.0,4076227.0,1457856.0,4.372765,4.153827,3.300096,3.720151,3.975306,3.259264,13.437224,12.175872,1.325524,16.44717,27.18723,10.01943,1.0,1.0,1.0,2419.0,4258.0,4215.0,19.60862,23.49057,5.980418,214930.0,3.424611,3.549172,3.413502,38.584813,0.12027,0.057609,1.0,3832957.0,3.608838,4.107772,236014.0,6.442511,40.093104,12.763745,1.0,3.505448,9.642084,867276500.0,53.722237,16.181699,1906224.0,3.380084,7965148000.0,3.258549,3.053763,3.327361,1916062.0,0.59417,3.462084,1.0,1.0,12667.02,42858.0


In [55]:
area_code = 6428
df[df["area_code"] == area_code][["year"] + [f"{asset_type}_yearly_price_growth" for asset_type in asset_types] + [f"{asset_type}_count" for asset_type in asset_types]]

Unnamed: 0,year,land_yearly_price_growth,building_yearly_price_growth,condo_yearly_price_growth,land_count,building_count,condo_count
17874,2008,-0.148374,,,13.0,12.0,
17875,2009,-0.263423,0.021085,,12.0,14.0,
17876,2010,0.321346,1.464304,,9.0,5.0,
17877,2011,0.11398,-0.349555,,12.0,10.0,
17878,2012,0.062973,1.106908,,17.0,5.0,
17879,2013,-0.132984,-0.630526,,17.0,13.0,
17880,2014,-0.072015,-0.760203,,13.0,4.0,
17881,2015,0.145216,3.453824,,13.0,10.0,
17882,2016,-0.124589,-0.502936,,14.0,4.0,
17883,2017,-0.074654,-0.883946,,12.0,3.0,
