In [3]:
import pandas as pd
from datetime import datetime, date, timedelta
import pytz
import numpy as np
import time
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import glob
import statistics
from scipy import stats
from scipy.stats import ttest_1samp
from scipy.stats import ttest_ind
from statsmodels.distributions.mixture_rvs import mixture_rvs
import statsmodels.api as sm

from numpy.random import seed
from numpy.random import randn
from scipy.stats import mannwhitneyu
import pickle
from statsmodels.iolib.summary2 import summary_col
np.set_printoptions(suppress=True)

import matplotlib.colors as mcolors

#Redundant libraries
#import pandas_market_calendars as mcal
#from empyrical import max_drawdown, alpha_beta, sharpe_ratio, alpha, annual_volatility, annual_return, cum_returns


In [4]:
#probably first need to install pyreadstat via pip: pip install pyreadstat
import sys
!{sys.executable} -m pip install pyreadstat



# Read Data

In [5]:
df = pd.read_spss('data.sav')
df

Unnamed: 0,lfdn,lastpage,quality,duration,country,country_code,eu_country,Estonia_D,Taiwan_D,China_D,...,bta_sub,bta,bta_real1a,bta_real1b,bta_real1,bta_real2a,tor,toe,bta_real2b,bta_real2
0,520.0,5931865.0,,35.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
1,301.0,5931889.0,,3179.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,0.32,0.1,0.0,0.32,0.0,-0.24,0.715911,0.55811,0.157801,-0.082199
2,468.0,5931889.0,,1973.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
3,937.0,5934355.0,,1373.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
4,430.0,5931865.0,,51.0,Taiwan,3.0,0.0,0.0,1.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3643,589.0,5931867.0,,27.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3644,3413.0,5931889.0,,2995.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3645,2027.0,5931889.0,,3142.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3646,2483.0,5934355.0,,2603.0,Vitnam,5.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [6]:
#df = df[df.country != 'Germany']
#df = df[df.country != 'Estonia']
#df

In [7]:
# relevant answers for patriotism and home bias (external variable name (see codebook)) and the nationality

dfr = df[['country','job','parent','uni_degree','patient','XF1','XF2','XF3','female','age','Estonia_D','Taiwan_D','China_D','Vietnam_D','Germany_D','Japan_D','XP1','XFD8','XFD9']]
dfr = dfr.dropna() # drop NaN values
dfr['homebias'] = (dfr['XFD8']-dfr['XFD9'])/(dfr['XFD8']+dfr['XFD9'])
dfr['Home Bias'] = dfr['homebias'].round(2)
dfr['XP1'] = ((-dfr['XP1']+5)-1)/3*2-1
dfr['Patriotism'] = dfr['XP1'].round(2)
dfr['Financial Literacy'] = (np.where(dfr['XF1']== 1, 1, 0)+np.where(dfr['XF2']== 3, 1, 0)+np.where(dfr['XF3']== 2, 1, 0))/3

# subtract capm share from each individual country
dfr['CAPM']=(np.where(dfr['country']== 'Taiwan', 0.0194, 0)+np.where(dfr['country']== 'China', 0.0936, 0)+np.where(dfr['country']== 'japan', 0.0783, 0)+np.where(dfr['country']== 'Vitnam', 0.002, 0)+np.where(dfr['country']== 'Estonia', 0, 0)+np.where(dfr['country']== 'Germany', 0.0260, 0))
dfr['Home Bias 2'] = (dfr['XFD8'])/(dfr['XFD8']+dfr['XFD9'])-dfr['CAPM']
dfr = dfr[dfr.country != 'Germany']
dfr = dfr[dfr.country != 'Estonia']
dfr

Unnamed: 0,country,job,parent,uni_degree,patient,XF1,XF2,XF3,female,age,...,Japan_D,XP1,XFD8,XFD9,homebias,Home Bias,Patriotism,Financial Literacy,CAPM,Home Bias 2
0,Taiwan,7.0,0.0,1.0,1.0,1.0,3.0,2.0,0.0,22.0,...,0.0,0.333333,60.0,40.0,0.200000,0.20,0.33,1.000000,0.0194,0.580600
1,Taiwan,4.0,0.0,1.0,1.0,1.0,3.0,2.0,0.0,22.0,...,0.0,0.333333,60.0,40.0,0.200000,0.20,0.33,1.000000,0.0194,0.580600
2,Taiwan,1.0,0.0,0.0,1.0,1.0,3.0,2.0,0.0,18.0,...,0.0,-0.333333,5.0,5.0,0.000000,0.00,-0.33,1.000000,0.0194,0.480600
3,Taiwan,7.0,0.0,1.0,0.0,1.0,1.0,3.0,0.0,25.0,...,0.0,-0.333333,25.0,25.0,0.000000,0.00,-0.33,0.333333,0.0194,0.480600
4,Taiwan,2.0,0.0,0.0,1.0,1.0,3.0,1.0,0.0,21.0,...,0.0,0.333333,20.0,70.0,-0.555556,-0.56,0.33,0.666667,0.0194,0.202822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3639,Vitnam,4.0,0.0,1.0,0.0,1.0,1.0,3.0,1.0,34.0,...,0.0,-0.333333,35.0,10.0,0.555556,0.56,-0.33,0.333333,0.0020,0.775778
3640,Vitnam,4.0,0.0,0.0,0.0,1.0,3.0,2.0,0.0,26.0,...,0.0,0.333333,10.0,30.0,-0.500000,-0.50,0.33,1.000000,0.0020,0.248000
3644,Vitnam,2.0,0.0,0.0,0.0,1.0,3.0,2.0,0.0,21.0,...,0.0,0.333333,10.0,10.0,0.000000,0.00,0.33,1.000000,0.0020,0.498000
3645,Vitnam,2.0,0.0,0.0,1.0,1.0,3.0,1.0,1.0,21.0,...,0.0,-1.000000,10.0,10.0,0.000000,0.00,-1.00,0.666667,0.0020,0.498000


# Linear Regression

In [8]:
sm.OLS(dfr['Home Bias'],sm.add_constant(dfr[['Patriotism','Financial Literacy','female','age','Taiwan_D','China_D','Vietnam_D','Japan_D']]),missing='drop').fit(cov_type='HAC',cov_kwds={'maxlags':1}).summary()

0,1,2,3
Dep. Variable:,Home Bias,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.009
Method:,Least Squares,F-statistic:,2.852
Date:,"Sat, 19 Sep 2020",Prob (F-statistic):,0.00377
Time:,12:29:14,Log-Likelihood:,-1345.3
No. Observations:,1607,AIC:,2709.0
Df Residuals:,1598,BIC:,2757.0
Df Model:,8,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.1988,0.140,-1.424,0.154,-0.472,0.075
Patriotism,0.0540,0.027,2.022,0.043,0.002,0.106
Financial Literacy,-0.0043,0.049,-0.087,0.931,-0.101,0.093
female,0.0503,0.029,1.736,0.083,-0.007,0.107
age,0.0040,0.003,1.551,0.121,-0.001,0.009
Taiwan_D,0.0629,0.119,0.530,0.596,-0.170,0.296
China_D,0.1348,0.119,1.133,0.257,-0.098,0.368
Vietnam_D,0.1686,0.120,1.403,0.161,-0.067,0.404
Japan_D,0.1470,0.125,1.173,0.241,-0.099,0.393

0,1,2,3
Omnibus:,14.011,Durbin-Watson:,1.993
Prob(Omnibus):,0.001,Jarque-Bera (JB):,9.563
Skew:,0.026,Prob(JB):,0.00838
Kurtosis:,2.626,Cond. No.,748.0


In [9]:
dfr.to_csv('relevantdata.csv')