#### Purpose of the notebook
To use simple statistics to answer the following questions:
- Which country have versatile return for housing investment from 2012 - 2022?
- Which country have the hightest mean return for housing investmen from 2012 - 2022?
- What is the performance of investment for each country from 2012 - 2022?

In [1]:
# import all the library
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

In [2]:
origin = pd.read_csv("integrated_housing_data.csv")

origin = origin.copy(deep=False)
origin["quarter"] = origin.quarter.apply(lambda str1: int(str1[-1]))
origin["time"] = pd.PeriodIndex(year = origin.year, quarter = origin.quarter)

In [3]:
stat = []
for gp in origin.groupby(by="location"):
    #print(gp[0])

    group_df = gp[1]
    group_df.index = group_df.time
    if len(group_df) < 168:
        continue
    #print(len(group_df), group_df.time.iloc[60])
    # Investment peroid (2012-2022)
    group_df = group_df[group_df.index>="2012"]
    group_df = group_df.copy(deep = True)
    group_df["real_price_roc"] = group_df["real_price"].diff()/group_df["real_price"]
    stat.append([gp[0], group_df.real_price_roc.mean(), 
        group_df.real_price_roc.std(), 
        np.abs(group_df.real_price_roc.std()/group_df.real_price_roc.mean())])
stat = pd.DataFrame(stat, columns=["location","mean", "std", "CV"])
stat.index = stat.location
del stat["location"]

Step 
- Here for each country, we calculate the housing price rate of change (or investment return) in quarter 
- And then find corresponding mean and standard deviation
- And use coefficents of variance to determine high risk/ low risk countries to invest  

In [4]:
stat.sort_values(by="CV")

Unnamed: 0_level_0,mean,std,CV
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
USA,0.013435,0.007645,0.569046
OECD,0.009124,0.006061,0.664285
DEU,0.011937,0.00854,0.715406
CHE,0.008653,0.007171,0.828694
NZL,0.019891,0.0172,0.864728
ISR,0.011674,0.012253,1.049666
CAN,0.012586,0.013353,1.060983
PRT,0.012881,0.01397,1.084527
SWE,0.011846,0.012898,1.088789
GBR,0.008403,0.009925,1.181162


#### Baseline Model
- We assume investor can trade the house quarterly and freely
- And we simply use the previous quarter price movement(baseline model) to inform the trading decision(long or short)
- Finally, we calculate the final return for each countries during 2012-2022 

In [5]:
def computation(df):
    """
    Compute final long-short value using predicted return
    """
    for i in range(1,len(df)):
        if df.iloc[i,1]>=0:
            df.iloc[i,2] = df.iloc[i-1,2] * (1 + df.iloc[i,0])
        else:
            df.iloc[i,2] = df.iloc[i-1,2] * (1 - df.iloc[i,0])
    return df

def PnL(return_test, return_pred):
    # a data frame for computing and saving long_short value
    long_short_df = pd.DataFrame({
            'Return': return_test.values,
            'Predicted Return': return_pred,
            'Long-short value': np.zeros(len(return_test))
        }
    )
    # give an initial point
    initial = pd.DataFrame(np.array([0, 0, 1]).reshape(-1, 3), columns = long_short_df.columns)
    # combine df and initial point
    long_short_df = pd.concat([initial, long_short_df])
    # compute long_short value
    long_short_df_final = computation(long_short_df)

    # return final long_short value of this period
    return long_short_df_final.iloc[-1,2]

In [6]:
return_lst = []
gps = {}
for gp in origin.groupby(by="location"):
    #print(gp[0])

    group_df = gp[1]
    group_df.index = group_df.time
    if len(group_df) < 168:
        continue
    #print(len(group_df), group_df.time.iloc[60])
    # Investment peroid (2012-2022)
    group_df = group_df[group_df.index>="2012"]
    group_df = group_df.copy(deep = True)
    tmp = pd.DataFrame()
    tmp["return_test"] = group_df["real_price"].diff()/group_df["real_price"]
    tmp["return_pred"] = tmp["return_test"].shift(1)
    tmp.dropna(inplace=True)
    #gps[gp[0]] = PnL(tmp.return_test, tmp.return_pred)
    return_lst.append([gp[0], PnL(tmp.return_test, tmp.return_pred)])
    

In [7]:
return_df = pd.DataFrame(return_lst, columns=["location", "final_return"])

In [8]:
return_df.sort_values(by="final_return")

Unnamed: 0,location,final_return
10,FIN,0.988834
2,BEL,1.006616
25,ZAF,1.027141
6,DNK,1.149892
16,JPN,1.176386
17,MEX,1.19725
19,NOR,1.204889
15,ITA,1.213142
11,FRA,1.249347
4,CHE,1.251781
