### Social Capital Variables

In [1]:
# three sources downloaded from: https://aese.psu.edu/nercrd/community/social-capital-resources
## Source 1: 1990, 1997, 2005              => Used in this code: 1990
## Source 2:       1997, 2005, 2009        => Used in this code: 1997, 2005, 2009
## Source 3:                        2014   => Used in this code: 2014

In [2]:
# Key variables: pvote, respn, nccs, assn.
## pvote: no issue.
## respn: no issue.
## nccs: underestimated in 1990. => nccs in 1990 should be corrected with time-trend method.
## assn: 12 orgs in 1990 and 1997 while 10 orgs in later years. The common 10 orgs should be considered. => assn in 1990 and 1997 should be corrected.

In [3]:
# Data point in all three sources
## assn in 90 97 05 09 14
## nccs in 90 97 05 09 14
## pvote in 88 92 96 04 08 12 (i.e., every 4 years from 1988)
## respn in 90 00 05 10 (i.e., every 5 years from 1990) 

# But, the years are not considered. Social capital variables are measured as if they are measured in 1990, 1997, 2005, 2009, and 2014.

In [4]:
# Notes
## assn is not deflated in the oldest version.
## The description of assn is different across versions.
## Deflation across different datasets should be considered carefully.

In [5]:
# References
## Hasan, I., Chun Keung Hoi, Q. Wu, and H. Zhang. 2017. Social Capital and Debt Contracting: Evidence from Bank Loans and Public Bonds. Journal of Financial and Quantitative Analysis 52 (3): 1017–1047.
## Hasan, I., Chun-Keung (stan) Hoi, Q. Wu, and H. Zhang. 2017. Does Social Capital Matter in Corporate Decisions? Evidence from Corporate Tax Avoidance. Journal of Accounting Research 55 (3): 629–668.
## Hasan, M. M., and A. Habib. 2019. Social capital and trade credit. International Review of Financial Analysis 61: 158–174.

In [12]:
nccs97= sc97[['fips', 'nccs97']]
nccs05= sc05[['fips', 'nccs05']]
nccs09= sc09[['fips', 'nccs09']]
print(nccs97.head(1),"\n","*"*100)
print(nccs05.head(1),"\n","*"*100)
print(nccs09.head(1),"\n","*"*100)
nccs= pd.merge(nccs97, nccs05, on='fips')
nccs= pd.merge(nccs, nccs09, on='fips')
nccs['nccs_growth1']=1+(nccs['nccs05']-nccs['nccs97']) / nccs['nccs97']
nccs['nccs_growth2']=1+(nccs['nccs09']-nccs['nccs05']) / nccs['nccs05']
nccs['nccs_growth_avg']= (nccs['nccs_growth1']+nccs['nccs_growth2'])/2
nccs['nccs90']= (nccs['nccs97']/nccs['nccs_growth_avg']).round(0) # nccs90 estimated
nccs.head(2)

   fips  nccs97
0  1001    94.0 
 ****************************************************************************************************
   fips  nccs05
0  1001   143.0 
 ****************************************************************************************************
   fips  nccs09
0  1001   182.0 
 ****************************************************************************************************


Unnamed: 0,fips,nccs97,nccs05,nccs09,nccs_growth1,nccs_growth2,nccs_growth_avg,nccs90
0,1001,94.0,143.0,182.0,1.521277,1.272727,1.397002,67.0
1,1003,404.0,588.0,737.0,1.455446,1.253401,1.354423,298.0


In [13]:
sc90=pd.merge(sc90, nccs[['fips','nccs90']], on="fips")
sc90[['fips','area','nccs90', 'nccs90']].head(2)

Unnamed: 0,fips,area,nccs90,nccs90.1
0,1001,"Autauga, AL",67.0,67.0
1,1003,"Baldwin, AL",298.0,298.0


### Unifying variables (1/2)

In [14]:
sc_sets= [sc90, sc97, sc05, sc09, sc14]

In [15]:
for df in sc_sets:
    df.columns= df.columns.str.strip().str.lower()
    df.columns=df.columns.str.replace('ski90pcm','sk90_unstd',regex=True)
    df.columns=df.columns.str.replace('ski90std','sk90',regex=True)
    df.columns=df.columns.str.replace('fitnes','fit',regex=True)
    df.columns=df.columns.str.replace('fitns','fit',regex=True)
    df.columns=df.columns.str.replace('religious','relig',regex=True)
    df.columns=df.columns.str.replace('business','bus',regex=True)
    df.columns=df.columns.str.replace('political','pol',regex=True)
    df.columns=df.columns.str.replace('professional','prof',regex=True)
    df.columns=df.columns.str.replace('bowling','bowl',regex=True)
    df.columns=df.columns.str.replace('recreational','fit',regex=True)
    df.columns=df.columns.str.replace('sports','sport',regex=True)

### Check raw each data (after unifying variables)

In [16]:
sc_sets[0].head(2)

Unnamed: 0,fips,area,bowl90,civic90,fit90,golf90,relig90,sport90,memspt90,pol90,prof90,bus90,labor90,memnec90,assn90,respn90,pvote88,pvote92,pop90,sk90_unstd,sk90,year,nccs90
0,1001,"Autauga, AL",0,7,1,0,21,0,1,0,0,2,5,0,37,65.0,48.337755,64.603714,34222,-0.897834,-0.474257,1990,67.0
1,1003,"Baldwin, AL",2,29,4,1,80,0,9,1,0,7,3,3,139,65.0,48.933977,63.887171,98280,-0.362414,-0.195187,1990,298.0


In [17]:
sc_sets[1].head(2)

Unnamed: 0,fips,area,bowl97,civic97,fit97,golf97,relig97,sport97,memspt97,pol97,prof97,bus97,labor97,memnec97,respn00,pvote96,pop97,nccs97,assn97,sk97,year
0,1001,"Autauga, AL",0,4,0,0,35,1,1,0,0,4,5,0,0.63,0.559864,41238,94.0,1.212474,-0.684649,1997
1,1003,"Baldwin, AL",1,20,11,7,123,1,9,0,3,9,4,1,0.54,0.528056,130164,404.0,1.452014,-0.739966,1997


In [18]:
sc_sets[2].head(2)

Unnamed: 0,fips,area,relig05,civic05,bus05,pol05,prof05,labor05,bowl05,fit05,golf05,sport05,pop05,pvote04,respn05,nccs05,assn05,sk05,year
0,1001,"Autauga, AL",40,4,3,1,0,5,1,3,2,0,49676,0.58,0.63,143.0,1.187696,-0.718073,2005
1,1003,"Baldwin, AL",149,17,9,0,3,3,1,16,9,0,162183,0.57,0.54,588.0,1.276336,-0.868675,2005


In [19]:
sc_sets[3].head(2)

Unnamed: 0,fips,area,relig09,civic09,bus09,pol09,prof09,labor09,bowl09,fit09,golf09,sport09,pop09,respn10,pvote08,nccs09,assn09,sk09,year
0,1001,"Autauga, AL",50,7,3,0,1,5,1,4,2,0,54135,0.78,0.635648,182.0,1.348481,-0.32673,2009
1,1003,"Baldwin, AL",161,21,7,0,1,2,2,18,8,0,179406,0.73,0.608996,737.0,1.226269,-0.422958,2009


In [20]:
sc_sets[4].head(2)

Unnamed: 0,fips,area,relig14,civic14,bus14,pol14,prof14,labor14,bowl14,fit14,golf14,sport14,pop14,assn14,pvote12,respn10,nccs14,sk14,year
0,1001,"Autauga, AL",53,7,3,0,1,4,1,5,2,0,55290,1.37457,0.644956,0.78,157,-0.631003,2014
1,1003,"Baldwin, AL",169,16,9,0,3,1,1,25,7,0,199713,1.15666,0.674735,0.73,718,-0.555396,2014


### Unifying variables (2/2) + Deleting years in variables

In [21]:
sc_sets[0].drop(columns=['pvote88', 'memspt90', 'memnec90'], inplace=True) # drop pvote88. i.e., keep pvote92 only.
sc_sets[0].drop(columns=['sk90_unstd'], inplace=True)
sc_sets[0]['assn90']= sc_sets[0]['assn90']/(sc_sets[0]['pop90']/1000)
sc_sets[0].head(1)

sc_sets[1].drop(columns=['memspt97', 'memnec97'], inplace=True)

for df in sc_sets:
    df.columns=df.columns.str.replace('\d+','',regex=True).str.strip()

In [22]:
for df in sc_sets:
    df['nccs']=df['nccs']/(df['pop']/10000)

### Check raw each data (after unifying variables)

In [23]:
sc_sets[0].head(1)

Unnamed: 0,fips,area,bowl,civic,fit,golf,relig,sport,pol,prof,bus,labor,assn,respn,pvote,pop,sk,year,nccs
0,1001,"Autauga, AL",0,7,1,0,21,0,0,0,2,5,1.081176,65.0,64.603714,34222,-0.474257,1990,19.578049


In [24]:
sc_sets[1].head(1)

Unnamed: 0,fips,area,bowl,civic,fit,golf,relig,sport,pol,prof,bus,labor,respn,pvote,pop,nccs,assn,sk,year
0,1001,"Autauga, AL",0,4,0,0,35,1,0,0,4,5,0.63,0.559864,41238,22.79451,1.212474,-0.684649,1997


In [25]:
sc_sets[2].head(1)

Unnamed: 0,fips,area,relig,civic,bus,pol,prof,labor,bowl,fit,golf,sport,pop,pvote,respn,nccs,assn,sk,year
0,1001,"Autauga, AL",40,4,3,1,0,5,1,3,2,0,49676,0.58,0.63,28.786537,1.187696,-0.718073,2005


In [26]:
sc_sets[3].head(1)

Unnamed: 0,fips,area,relig,civic,bus,pol,prof,labor,bowl,fit,golf,sport,pop,respn,pvote,nccs,assn,sk,year
0,1001,"Autauga, AL",50,7,3,0,1,5,1,4,2,0,54135,0.78,0.635648,33.619655,1.348481,-0.32673,2009


In [27]:
sc_sets[4].head(1)

Unnamed: 0,fips,area,relig,civic,bus,pol,prof,labor,bowl,fit,golf,sport,pop,assn,pvote,respn,nccs,sk,year
0,1001,"Autauga, AL",53,7,3,0,1,4,1,5,2,0,55290,1.37457,0.644956,0.78,28.395732,-0.631003,2014


### Calculating assn based on sum of 10 orgs

In [28]:
for df in sc_sets:
    df.drop(columns=["assn"], inplace=True)
    df['assn']= (df['relig'] + df['civic'] + df['bus'] + df['pol'] + df['prof'] + df['labor'] + df['bowl'] + df['fit'] + df['golf'] + df['sport']) / (df['pop']/1000)
    df.drop(columns= ['bowl','civic','fit','golf','relig','sport','pol','prof','bus','labor'],inplace=True)
#     df= df[['fips','area', 'year', 'pop', 'assn', 'assn_cal', 'nccs', 'pvote', 'respn', 'sk']]
    print(df[['fips','area','assn']].head(1))

   fips         area      assn
0  1001  Autauga, AL  1.051955
   fips         area      assn
0  1001  Autauga, AL  1.188224
   fips         area      assn
0  1001  Autauga, AL  1.187696
   fips         area      assn
0  1001  Autauga, AL  1.348481
   fips         area     assn
0  1001  Autauga, AL  1.37457


In [29]:
sc_sets[0].head(1)

Unnamed: 0,fips,area,respn,pvote,pop,sk,year,nccs,assn
0,1001,"Autauga, AL",65.0,64.603714,34222,-0.474257,1990,19.578049,1.051955


In [30]:
sc_sets[-1].head(1)

Unnamed: 0,fips,area,pop,pvote,respn,nccs,sk,year,assn
0,1001,"Autauga, AL",55290,0.644956,0.78,28.395732,-0.631003,2014,1.37457


### Dropping obs with missing values

In [31]:
for df in sc_sets:
    df.dropna(inplace=True)
    print(df['fips'].count())

2426
3100
3107
3106
3141


In [32]:
sc14.head()

Unnamed: 0,fips,area,pop,pvote,respn,nccs,sk,year,assn
0,1001,"Autauga, AL",55290,0.644956,0.78,28.395732,-0.631003,2014,1.37457
1,1003,"Baldwin, AL",199713,0.674735,0.73,35.951591,-0.555396,2014,1.15666
2,1005,"Barbour, AL",26815,0.665031,0.63,34.309155,-0.891036,2014,0.820436
3,1007,"Bibb, AL",22549,0.656838,0.58,23.947847,-0.906582,2014,1.020001
4,1009,"Blount, AL",57658,0.708668,0.8,18.731139,-1.01328,2014,0.849839


### PCA

In [42]:
df=sc90[['fips', 'respn','pvote','assn','nccs']].copy()
features = ['respn','pvote','assn','nccs']
df.head(1)

Unnamed: 0,fips,respn,pvote,assn,nccs
0,1001,65.0,64.603714,1.051955,19.578049


In [43]:
x = df.loc[:, features].values
# Separating out the target
y = df.loc[:,['fips']].values
# Standardizing the features
x = StandardScaler().fit_transform(x)

In [44]:
pca = PCA(n_components=1)
principalComponents = pca.fit_transform(x)
pca_df = pd.DataFrame(data = principalComponents
             , columns = ['pc1'])
pca_df.head(2)

Unnamed: 0,pc1
0,-0.35935
1,0.147071


In [45]:
df2 = pd.concat([sc90, pca_df], axis = 1)

In [46]:
df2.head(1)

Unnamed: 0,fips,area,respn,pvote,pop,sk,year,nccs,assn,pc1
0,1001.0,"Autauga, AL",65.0,64.603714,34222.0,-0.474257,1990.0,19.578049,1.051955,-0.35935


In [47]:
df2['sk'].corr(df2['pc1'])

0.2245640258309765

### Concatenating five social captial sets

In [33]:
sc= pd.concat([sc90, sc97, sc05, sc09, sc14])
print(sc.shape)

(15572, 10)


In [36]:
sc.sort_values(by=["fips", "year"]).head(10)

Unnamed: 0,fips,area,respn,pvote,pop,sk,year,nccs
0,1001,"Autauga, AL",65.0,64.603714,34222,-0.474257,1990,67.0
0,1001,"Autauga, AL",0.63,0.559864,41238,-0.684649,1997,94.0
0,1001,"Autauga, AL",0.63,0.58,49676,-0.718073,2005,143.0
0,1001,"Autauga, AL",0.78,0.635648,54135,-0.32673,2009,182.0
0,1001,"Autauga, AL",0.78,0.644956,55290,-0.631003,2014,157.0
1,1003,"Baldwin, AL",65.0,63.887171,98280,-0.195187,1990,298.0
1,1003,"Baldwin, AL",0.54,0.528056,130164,-0.739966,1997,404.0
1,1003,"Baldwin, AL",0.54,0.57,162183,-0.868675,2005,588.0
1,1003,"Baldwin, AL",0.73,0.608996,179406,-0.422958,2009,737.0
1,1003,"Baldwin, AL",0.73,0.674735,199713,-0.555396,2014,718.0


### To-do
1. PCA code from Hasan et al. (2017 JAR)
2. Code to GitHub
3. Check variable definitions again.