In [224]:
import pandas as pd
from datetime import datetime, date, timedelta
import pytz
import numpy as np
import time
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import glob
import statistics
from scipy import stats
from scipy.stats import ttest_1samp
from scipy.stats import ttest_ind
from statsmodels.distributions.mixture_rvs import mixture_rvs
import statsmodels.api as sm
import pandas_market_calendars as mcal
from numpy.random import seed
from numpy.random import randn
from scipy.stats import mannwhitneyu
import pickle
from statsmodels.iolib.summary2 import summary_col
np.set_printoptions(suppress=True)
from empyrical import max_drawdown, alpha_beta, sharpe_ratio, alpha, annual_volatility, annual_return, cum_returns
import matplotlib.colors as mcolors



In [9]:
#probably first need to install pyreadstat via pip: pip install pyreadstat

# Read Data

In [230]:
df = pd.read_spss('data.sav')
df

Unnamed: 0,lfdn,lastpage,quality,duration,country,country_code,eu_country,Estonia_D,Taiwan_D,China_D,...,bta_sub,bta,bta_real1a,bta_real1b,bta_real1,bta_real2a,tor,toe,bta_real2b,bta_real2
0,520.0,5931865.0,,35.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
1,301.0,5931889.0,,3179.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,0.32,0.1,0.0,0.32,0.0,-0.24,0.715911,0.55811,0.157801,-0.082199
2,468.0,5931889.0,,1973.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
3,937.0,5934355.0,,1373.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
4,430.0,5931865.0,,51.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3643,589.0,5931867.0,,27.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3644,3413.0,5931889.0,,2995.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3645,2027.0,5931889.0,,3142.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3646,2483.0,5934355.0,,2603.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [231]:
# relevant answers for patriotism and home bias (external variable name (see codebook)) and the nationality

dfr = df[['country','job','parent','USD_ppp_income','wstock','theta','patient','XF1','XF2','XF3','female','age','Estonia_D','Taiwan_D','China_D','Vietnam_D','Germany_D','Japan_D','XP1','XFD6','XFD7','XFD8','XFD9']]
dfr = dfr.dropna() # drop NaN values
dfr['homebias'] = (dfr['XFD8']-dfr['XFD9'])/(dfr['XFD8']+dfr['XFD9'])
dfr['Home Bias'] = dfr['homebias'].round(2)
dfr['XP1'] = ((-dfr['XP1']+5)-1)/3*2-1
dfr['Patriotism'] = dfr['XP1'].round(2)
dfr['Financial Literacy'] = (np.where(dfr['XF1']== 1, 1, 0)+np.where(dfr['XF2']== 3, 1, 0)+np.where(dfr['XF3']== 2, 1, 0))/3

# subtract capm share from each individual country
dfr['CAPM']=(np.where(dfr['country']== 'Taiwan', 0.0194, 0)+np.where(dfr['country']== 'China', 0.0936, 0)+np.where(dfr['country']== 'japan', 0.0783, 0)+np.where(dfr['country']== 'Vitnam', 0.002, 0)+np.where(dfr['country']== 'Estonia', 0, 0)+np.where(dfr['country']== 'Germany', 0.0260, 0))
dfr['Home Bias 2'] = (dfr['XFD8'])/(dfr['XFD8']+dfr['XFD9'])-dfr['CAPM']
#dfr = dfr[dfr.country != 'Germany']
#dfr = dfr[dfr.country != 'Estonia']
dfr

Unnamed: 0,country,job,parent,USD_ppp_income,wstock,theta,patient,XF1,XF2,XF3,...,XFD6,XFD7,XFD8,XFD9,homebias,Home Bias,Patriotism,Financial Literacy,CAPM,Home Bias 2
0,Taiwan,7.0,0.0,4666.666504,4.0,0.533333,1.0,1.0,3.0,2.0,...,0.0,0.0,60.0,40.0,0.200000,0.20,0.33,1.000000,0.0194,0.580600
1,Taiwan,4.0,0.0,4666.666504,4.0,0.533333,1.0,1.0,3.0,2.0,...,0.0,0.0,60.0,40.0,0.200000,0.20,0.33,1.000000,0.0194,0.580600
2,Taiwan,1.0,0.0,6666.666504,3.0,1.000000,1.0,1.0,3.0,2.0,...,30.0,60.0,5.0,5.0,0.000000,0.00,-0.33,1.000000,0.0194,0.480600
3,Taiwan,7.0,0.0,1666.666626,3.0,2.666667,0.0,1.0,1.0,3.0,...,25.0,25.0,25.0,25.0,0.000000,0.00,-0.33,0.333333,0.0194,0.480600
4,Taiwan,2.0,0.0,4666.666504,4.0,2.000000,1.0,1.0,3.0,1.0,...,0.0,10.0,20.0,70.0,-0.555556,-0.56,0.33,0.666667,0.0194,0.202822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3639,Vitnam,4.0,0.0,10769.230469,3.0,1.000000,0.0,1.0,1.0,3.0,...,50.0,5.0,35.0,10.0,0.555556,0.56,-0.33,0.333333,0.0020,0.775778
3640,Vitnam,4.0,0.0,923.076904,4.0,18.461538,0.0,1.0,3.0,2.0,...,50.0,10.0,10.0,30.0,-0.500000,-0.50,0.33,1.000000,0.0020,0.248000
3644,Vitnam,2.0,0.0,1538.461548,2.0,1.230769,0.0,1.0,3.0,2.0,...,70.0,10.0,10.0,10.0,0.000000,0.00,0.33,1.000000,0.0020,0.498000
3645,Vitnam,2.0,0.0,1230.769287,2.0,1.000000,1.0,1.0,3.0,1.0,...,40.0,40.0,10.0,10.0,0.000000,0.00,-1.00,0.666667,0.0020,0.498000


# Linear Regression

In [232]:
sm.OLS(dfr['Home Bias'],sm.add_constant(dfr[['Patriotism','Financial Literacy','female','age','USD_ppp_income','China_D','Vietnam_D','Japan_D']]),missing='drop').fit(cov_type='HAC',cov_kwds={'maxlags':1}).summary()

0,1,2,3
Dep. Variable:,Home Bias,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,2.507
Date:,"Thu, 10 Sep 2020",Prob (F-statistic):,0.0104
Time:,13:38:39,Log-Likelihood:,-1843.1
No. Observations:,2104,AIC:,3704.0
Df Residuals:,2095,BIC:,3755.0
Df Model:,8,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0618,0.072,-0.860,0.390,-0.203,0.079
Patriotism,0.0213,0.024,0.898,0.369,-0.025,0.068
Financial Literacy,-0.0352,0.046,-0.761,0.447,-0.126,0.055
female,0.0689,0.026,2.625,0.009,0.017,0.120
age,0.0045,0.002,1.988,0.047,6.31e-05,0.009
USD_ppp_income,-2.181e-06,9.46e-07,-2.305,0.021,-4.04e-06,-3.26e-07
China_D,0.0103,0.033,0.315,0.753,-0.054,0.075
Vietnam_D,0.0432,0.035,1.236,0.216,-0.025,0.112
Japan_D,-0.0028,0.055,-0.050,0.960,-0.111,0.106

0,1,2,3
Omnibus:,40.666,Durbin-Watson:,1.979
Prob(Omnibus):,0.0,Jarque-Bera (JB):,22.47
Skew:,-0.015,Prob(JB):,1.32e-05
Kurtosis:,2.495,Cond. No.,76200.0


In [233]:
dfr.to_csv('relevantdata.csv')