In [1]:
import scipy as sp
import numpy as np
import pandas as pd
import timeit
import re
import json
import pickle
import fastparquet
import os
os.chdir('/mnt/t48/bighomes-active/sfeng/patentdiffusion/')
seed = 3
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
import datetime
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.iolib.summary2 as summary2

  from pandas.core import datetools


In [2]:
pathdir = "DataStore/2018-10/Reg1016/"
reg_f = "reg_model_1016.pkl"

regs = pickle.load(open(pathdir+reg_f, "rb"))

# Model names to equations
regs["model_names_eqn"] = dict(zip(regs["model_names"].tolist(), regs["model_names"].index.tolist()))

In [14]:
# JTH Rep
jr = pd.read_csv("DataStore/2018-07-P3/JTHReg0727/JTH_res_out_0726.csv",index_col=0)

# FE results (0930 - uses full dataset) and Sim results (1003 - uses only those with sim pc, sim pc msa values)
rr = pd.read_csv("DataStore/2018-10/Reg1016/reg_pairs_out_HC1_1016.csv",index_col=0)

# Use reg_model_1002 to assign model numbers
rr["Model Num"] = rr["Model"].map(regs["model_names_eqn"])

# Get rid of models without a number (squared ones excluded from analysis)
print(len(rr))
rr = rr.loc[rr["Model Num"].notnull()]
print(len(rr))
rr["samp"] = rr["samp"].apply(eval)
rr = pd.concat([jr,rr],axis=0)
# Reset index
rr = rr.reset_index(drop=True)
# Integer model number
rr["Model Num"] = rr["Model Num"].astype(int)

2308
2308


In [15]:
rr["id"] = list(zip(rr["samp"], rr["Model Num"]))
rr["id"] = rr["id"].astype(str)

# KS dict
ksd = rr[["id", "LKS"]].drop_duplicates()
ksd = dict(zip(ksd["id"], ksd["LKS"]))

# Sample dict
sd = rr[["id", "samp"]].drop_duplicates()
sd = dict(zip(sd["id"], sd["samp"]))

In [16]:
rr.head()

Unnamed: 0,1975-85,1985-95,1995-05,2005-15,LKS,Model,Model Num,index,samp,id
0,0.0530***,0.0624***,0.0646***,0.0740***,perc_match_10,"Perc Match Targ MSA, Year FE",0,"$I(MSA \, Match)$",JTH Rep,"('JTH Rep', 0)"
1,(0.0016),(0.0011),(0.0009),(0.0012),perc_match_10,"Perc Match Targ MSA, Year FE",0,,JTH Rep,"('JTH Rep', 0)"
2,58647,107358,185154,154619,perc_match_10,"Perc Match Targ MSA, Year FE",0,$N$,JTH Rep,"('JTH Rep', 0)"
3,0.02,0.03,0.03,0.02,perc_match_10,"Perc Match Targ MSA, Year FE",0,Adjusted $R^2$,JTH Rep,"('JTH Rep', 0)"
4,0.0523***,0.0616***,0.0642***,0.0729***,perc_match_10,"Perc Match Targ MSA, PC FE",1,"$I(MSA \, Match)$",JTH Rep,"('JTH Rep', 1)"


### 1. Results with Year FE and PC Controls

In [6]:
repl1 = pickle.load(open("DataStore/2018-07-P3/reg_names_dict.pkl", "rb"))
# repl2 = {k: "\multicolumn{2}{c}{"+v+"}" if len(v)>1 else v for k,v in repl1.items() }

In [26]:
# Index by each table: Norm/Raw, KS, Sample
tab_ind = {}
tab_ind[("raw", "pct_cite", "JTH", "No FE")] = ("JTH Rep", 0)
tab_ind[("norm", "pct_cite", "JTH", "No FE")] = ("JTH Rep", 2)
tab_ind[("raw", "sim_cite", "JTH", "No FE")] = ("JTH Targ Sim", 4)
tab_ind[("norm", "sim_cite", "JTH", "No FE")] = ("JTH Targ Sim", 6)

tab_ind[("raw", "pct_cite", "JTH", "PC FE")] = ("JTH Rep", 1)
tab_ind[("norm", "pct_cite", "JTH", "PC FE")] = ("JTH Rep", 3)
tab_ind[("raw", "sim_cite", "JTH", "PC FE")] = ("JTH Targ Sim", 5)
tab_ind[("norm", "sim_cite", "JTH", "PC FE")] = ("JTH Targ Sim", 7)

for samp in ["naics_name", "primclass"]:
    if samp == "naics_name":
#         mods = ["PC FE-Year FE", "PC M-Year FE"]
        mods = ["PC FE-Year FE", "PC M-Year FE", "PC M", "CC M", "NPC M", "Inv M", "Lawyer M", "Examiner M"]
    else:
        mods = ["PC FE-Year FE", "Exam FE-Year FE"]
#     for dm in ["docvecs", "ldavecs", "tp_pct_common_cited"]:
    for dm in ["docvecs", "tp_pct_common_cited"]:
        for mod in mods:
#             tab_ind[("raw", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"][mod])
            tab_ind[("norm", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"]["N "+mod])

#### 1.1 Results by year

In [27]:
# Results by year
ygs = ["index", "1975-85", "1985-95", "1995-05", "2005-15"]
tabp = {}
for k in tab_ind.keys():
    
    i = str(tab_ind[k])
    try:
        mod = rr.loc[(rr["id"] == i), ["Model", "Model Num", "samp"]]\
        .drop_duplicates().values[0]
    except Exception:
        continue
    
    if "JTH" in k:
#         ygs = ["index", "1975-85", "1985-95", "1995-05",]
        ygs = ["index", "1975-85", "1985-95", "1995-05", "2005-15"]
    else:
        ygs = ["index", "1975-85", "1985-95", "1995-05", "2005-15"]
        
    # Select appropriate years
    tab = rr.loc[(rr["id"] == i), ygs].fillna("").set_index("index")

    # Add other columns
    if "No FE" in k:
        tab.loc["Year FE"] = [True]*len(tab.columns)
        tab.loc["PC FE"] = [False]*len(tab.columns)
    else:
        tab.loc["Year FE"] = [True]*len(tab.columns)
        tab.loc["PC FE"] = [True]*len(tab.columns)

#     # Pandas tables
    tab = tab.reset_index()
    # Replace "index"
    tab = tab.rename(columns={"index":""})
    # Replace everything else
    tab = tab.replace(repl1).copy()

    tabp[k] = tab
tab_f = tabp      

In [28]:
for k,v in tab_f.items():
    if (k[0] == "norm"):
        print("\n"+str(k)+"\n"+"\n")
        ncols = len(v.columns)
        with pd.option_context("max_colwidth", 1000):
            print(v.to_latex(index=False,escape=False, column_format="lccc"))


('norm', 'pct_cite', 'JTH', 'No FE')


\begin{tabular}{lccc}
\toprule
                   &    1975-85 &    1985-95 &    1995-05 &    2005-15 \\
\midrule
 $I(MSA \, Match)$ &  0.2448*** &  0.2881*** &  0.2983*** &  0.3417*** \\
                   &   (0.0074) &   (0.0052) &   (0.0041) &   (0.0054) \\
               $N$ &      58647 &     107358 &     185154 &     154619 \\
    Adjusted $R^2$ &       0.02 &       0.03 &       0.03 &       0.02 \\
           Year FE &       True &       True &       True &       True \\
             PC FE &      False &      False &      False &      False \\
\bottomrule
\end{tabular}


('norm', 'sim_cite', 'JTH', 'No FE')


\begin{tabular}{lccc}
\toprule
                   &    1975-85 &   1985-95 &   1995-05 &   2005-15 \\
\midrule
 $I(MSA \, Match)$ &  0.0469*** &    0.0154 &   -0.0047 &    0.0157 \\
                   &   (0.0145) &  (0.0097) &  (0.0067) &  (0.0108) \\
               $N$ &      38541 &     69612 &    122217 &     52710 \\
    Adjuste

In [25]:
# for k,v in tab_f.items():
#     if (k[0] == "raw"):
#         print("\n"+str(k)+"\n"+"\n")
#         ncols = len(v.columns)
#         with pd.option_context("max_colwidth", 1000):
#             print(v.to_latex(index=False,escape=False, column_format="lccc"))

### 2. Results with Field Sim

In [19]:
repl1 = pickle.load(open("DataStore/2018-07-P3/reg_names_dict.pkl", "rb"))
dms = ["docvecs", "ldavecs", "tp_pct_common_cited"]
# dms = ["docvecs"]
# repl2 = {k: "\multicolumn{2}{c}{"+v+"}" if len(v)>1 else v for k,v in repl1.items() }
            
# Add names for standard errors
ygs = ["index", "1975-85", "1985-95", "1995-05", "2005-15"]
rr2 = rr.copy()
se_index = rr2[ygs+["index"]].loc[rr2["index"].isnull()].index
above_var = rr2.loc[se_index-1, "index"].tolist()
rr2.loc[se_index, "index"] = ["(s.e.) "+v for v in above_var]

#### 2.1 Results by year

In [29]:
# Index by each table: Norm/Raw, KS, Sample
tab_ind = {}
for samp in ["naics_name", "primclass"]:
    mods_all = ["PC M-Sim PC", "PC M-Sim PC MSA", "PC M-Int PC", "PC M-Int PC MSA",
                "Exam FE-Sim PC MSA", "Exam FE-Int PC MSA",
                "Exam FE-Break Int PC MSA"]
    if samp == "naics_name":
        mods = mods_all[:4]
    else:
        mods = mods_all[4:]
    for dm in dms:
        for mod in mods:
#             tab_ind[("raw", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"][mod])
            tab_ind[("norm", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"]["N "+mod])

tab_ind_yr = tab_ind

In [30]:
tab_no_c = {}
tab_c = {}
no_c_ind = ["C(common_est_inv)[T.1.0]", "C(common_pat_inv)[T.True]", 
            "C(lawyer_match)[T.True]", "C(examiner_match)[T.True]",
           "C(primclass_match)[T.True]"]
no_c_ind = no_c_ind+["(s.e.) "+c for c in no_c_ind]

for k in tab_ind_yr.keys():
    
    i = str(tab_ind_yr[k])
    
    try:
        mod = rr2.loc[(rr2["id"] == i), ["Model", "Model Num", "samp"]]\
    .drop_duplicates().values[0]
    except Exception:
        continue

    tab = rr2.loc[(rr2["id"] == i), ygs].fillna("").set_index("index")

    # Add other columns
    tab.loc["Year \& PC FE"] = [True]*len(tab.columns)
    tab.loc["MSA, Inv, Lawyer,  Examiner Match \& FE"] = [True]*len(tab.columns)

    # Create gaps
    tab = tab.fillna("").reset_index()

    #1. Create table without controls
    tab2 = tab.copy()
    tab2 = tab2.loc[~(tab2["index"].isin(no_c_ind))].reset_index(drop=True)
    # Get rid of se labels
    tab2.loc[tab2["index"].apply(lambda x: "(s.e.)" in x), "index"] = ""
    tab2 = tab2.rename(columns={"index":""})
    tab2 = tab2.replace(repl1).copy()

    tab_no_c[k] = tab2

    #2. Original tables with controls
    # Get rid of se labels
    tab.loc[tab["index"].apply(lambda x: "(s.e.)" in x), "index"] = ""
    tab = tab.rename(columns={"index":""})

    # Pandas tables
    tab = tab.replace(repl1).copy()
    tab_c[k] = tab
tab_c_yr = tab_c
tab_no_c_yr = tab_no_c

In [31]:
for k,v in tab_no_c_yr.items():
    if (k[0]=='norm') & (k[1]=='docvecs'):
        print(k)
        display(v)

('norm', 'docvecs', 'naics_name', 'PC M-Sim PC')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0171**,0.0282***,0.0246***,0.0236***
1,,(0.0079),(0.0046),(0.0035),(0.0031)
2,"$sim_{DV}(pc_{i}, pc_{j})$",0.2849***,0.2949***,0.2871***,0.2941***
3,,(0.0061),(0.0038),(0.0030),(0.0029)
4,$N$,80414,226512,382447,511077
5,Adjusted $R^2$,0.12,0.12,0.13,0.09
6,Year \& PC FE,True,True,True,True
7,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'naics_name', 'PC M-Sim PC MSA')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0100,0.0214***,0.0153***,0.0152***
1,,(0.0080),(0.0047),(0.0036),(0.0031)
2,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.1163***,0.1145***,0.1439***,0.1576***
3,,(0.0045),(0.0027),(0.0024),(0.0024)
4,$N$,80414,226512,382447,511077
5,Adjusted $R^2$,0.10,0.10,0.11,0.07
6,Year \& PC FE,True,True,True,True
7,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'naics_name', 'PC M-Int PC')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0174**,0.0258***,0.0233***,0.0190***
1,,(0.0078),(0.0045),(0.0035),(0.0034)
2,"$I_{MSA} * sim_{DV}(pc_{i}, pc_{j})$",-0.0009,0.0077,0.0040,0.0082**
3,,(0.0083),(0.0048),(0.0036),(0.0037)
4,"$sim_{DV}(pc_{i}, pc_{j})$",0.2851***,0.2928***,0.2860***,0.2918***
5,,(0.0065),(0.0040),(0.0031),(0.0031)
6,$N$,80414,226512,382447,511077
7,Adjusted $R^2$,0.12,0.12,0.13,0.09
8,Year \& PC FE,True,True,True,True
9,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'naics_name', 'PC M-Int PC MSA')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0106,0.0247***,0.0176***,0.0177***
1,,(0.0087),(0.0051),(0.0039),(0.0032)
2,"$I_{MSA} * sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.0024,0.0134**,0.0098**,0.0200***
3,,(0.0091),(0.0053),(0.0043),(0.0044)
4,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.1157***,0.1111***,0.1414***,0.1524***
5,,(0.0050),(0.0030),(0.0026),(0.0026)
6,$N$,80414,226512,382447,511077
7,Adjusted $R^2$,0.10,0.10,0.11,0.07
8,Year \& PC FE,True,True,True,True
9,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'primclass', 'Exam FE-Sim PC MSA')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",-0.0023,0.0174***,0.0205***,0.0128***
1,,(0.0098),(0.0056),(0.0041),(0.0034)
2,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.0525***,0.0548***,0.0697***,0.0738***
3,,(0.0049),(0.0029),(0.0026),(0.0026)
4,$N$,73175,207260,355650,475340
5,Adjusted $R^2$,0.08,0.08,0.08,0.06
6,Year \& PC FE,True,True,True,True
7,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'primclass', 'Exam FE-Int PC MSA')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0071,0.0257***,0.0320***,0.0199***
1,,(0.0134),(0.0073),(0.0054),(0.0047)
2,"$I_{MSA} * sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",-0.0101,-0.0098,-0.0150***,-0.0103**
3,,(0.0111),(0.0064),(0.0054),(0.0052)
4,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.0543***,0.0567***,0.0729***,0.0760***
5,,(0.0053),(0.0032),(0.0028),(0.0028)
6,$N$,73175,207260,355650,475340
7,Adjusted $R^2$,0.08,0.08,0.08,0.06
8,Year \& PC FE,True,True,True,True
9,"MSA, Inv, Lawyer, Examiner Match \& FE",True,True,True,True


('norm', 'docvecs', 'primclass', 'Exam FE-Break Int PC MSA')


Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0059,0.0297***,0.0435***,0.0251***
1,,(0.0149),(0.0083),(0.0061),(0.0051)
2,"$I_{MSA} * I(sim_{DV,pc,MSA}>0)*sim_{DV}(pc_{i...",-0.0142,-0.0154**,-0.0307***,-0.0211***
3,,(0.0128),(0.0075),(0.0063),(0.0058)
4,"$I_{MSA} * I(sim_{DV,pc,MSA} \leq 0)*sim_{DV}(...",-0.0781,-0.0010,0.0351,-0.0135
5,,(0.0480),(0.0287),(0.0242),(0.0270)
6,"$I(sim_{DV,pc,MSA}>0)*sim_{DV}(pc_{i,MSA_i}, p...",0.0695***,0.0640***,0.0919***,0.0947***
7,,(0.0075),(0.0047),(0.0041),(0.0040)
8,"$I(sim_{DV,pc,MSA} \leq 0)*sim_{DV}(pc_{i,MSA_...",0.0197,0.0417***,0.0326***,0.0330***
9,,(0.0124),(0.0068),(0.0061),(0.0063)


In [32]:
for k,v in tab_no_c_yr.items():
    if (k[0]=='norm') & (k[1]=='docvecs'):
        print("\n"+str(k)+"\n"+"\n")
        ncols = len(v.columns)
        with pd.option_context("max_colwidth", 1000):
            print(v.to_latex(index=False,escape=False, column_format="lcccc"))


('norm', 'docvecs', 'naics_name', 'PC M-Sim PC')


\begin{tabular}{lcccc}
\toprule
                                         &    1975-85 &    1985-95 &    1995-05 &    2005-15 \\
\midrule
                       $I(MSA \, Match)$ &   0.0171** &  0.0282*** &  0.0246*** &  0.0236*** \\
                                         &   (0.0079) &   (0.0046) &   (0.0035) &   (0.0031) \\
              $sim_{DV}(pc_{i}, pc_{j})$ &  0.2849*** &  0.2949*** &  0.2871*** &  0.2941*** \\
                                         &   (0.0061) &   (0.0038) &   (0.0030) &   (0.0029) \\
                                     $N$ &      80414 &     226512 &     382447 &     511077 \\
                          Adjusted $R^2$ &       0.12 &       0.12 &       0.13 &       0.09 \\
                           Year \& PC FE &       True &       True &       True &       True \\
 MSA, Inv, Lawyer,  Examiner Match \& FE &       True &       True &       True &       True \\
\bottomrule
\end{tabular}


('norm', 'docve

#### 2.2 Collected results - not doing this yet

In [36]:
# Index by each table: Norm/Raw, KS, Sample
tab_ind = {}
for samp in ["naics_name", "primclass"]:
    mods_all = ["PC M-Sim PC", "PC M-Sim PC MSA", "PC M-Int PC", "PC M-Int PC MSA",
                "Exam FE-Sim PC MSA", "Exam FE-Int PC MSA",
                "Exam FE-Break Int PC MSA"]
    if samp == "naics_name":
        mods = mods_all[:4]
    else:
        mods = mods_all[4:]
    mods_n = dict(zip(mods_all, range(1,len(mods_all)+1)))
#     for dm in ["docvecs", "ldavecs", "tp_pct_common_cited"]:
    for dm in dms:
        for mod in mods:
#             tab_ind[("raw", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"][mod])
            tab_ind[("norm", dm, samp, mod)] = ((dm, samp), regs["model_names_eqn"]["N "+mod])

tab_ind_collected = tab_ind

In [37]:
tab_no_c = {}
no_c_ind = ["C(common_est_inv)[T.1.0]", "C(common_pat_inv)[T.True]", "C(lawyer_match)[T.True]",
            "C(primclass_match)[T.True]", "C(examiner_match)[T.True]"]
no_c_ind = no_c_ind+["(s.e.) "+c for c in no_c_ind]

for k in tab_ind_collected.keys():
    
    i = str(tab_ind_collected[k])
    
    mod = rr2.loc[(rr2["id"] == i), ["Model", "Model Num", "samp"]]\
.drop_duplicates().values[0]

    tab = rr2.loc[(rr2["id"] == i), ygs].fillna("").set_index("index")

    # Add other columns
    tab.loc["Year FE"] = [True]*len(tab.columns)
    tab.loc["PC FE"] = [True]*len(tab.columns)

    if ("All FE" in k[3]) or ("Inv FE" in k[3]):
        tab.loc["Inv \& Lawyer Match"] = [True]*len(tab.columns)
    else:
        tab.loc["Inv \& Lawyer Match"] = [False]*len(tab.columns)

    # Create gaps
    tab = tab.fillna("").reset_index()

    #1. Create table without controls
    tab2 = tab.copy()
    tab2 = tab2.loc[~(tab2["index"].isin(no_c_ind))]
    # Set back index
    tab2 = tab2.set_index("index")
    
#     #
#     # Get rid of se labels
#     tab2.loc[tab2["index"].apply(lambda x: "(s.e.)" in x), "index"] = ""
#     tab2 = tab2.rename(columns={"index":""})
#     tab2 = tab2.replace(repl1).copy()

    tab_no_c[k] = tab2


tab_collected = tab_no_c

In [38]:
tab_col = {}
tab_col["norm"] = {}
# tab_col["raw"] = {}

In [39]:
keys = {}
keys["norm"] = [i for i in list(tab_collected.keys()) if "norm" in i]
# keys["raw"] = [i for i in list(tab_collected.keys()) if "raw" in i]
for m in keys.keys():
    for i,k in enumerate(keys[m]):
        # If group matches m, and is even
        if (m in k) & (i % 2) == 0:
            # Existing index order
            ind1 = tab_collected[keys[m][i+1]].index

            # First table
            a = tab_collected[keys[m][i]].copy()
            a.columns = pd.MultiIndex.from_product([["({0})".format(mods_n[keys[m][i][3]])], a.columns.tolist()])
            b = tab_collected[keys[m][i+1]].copy()
            b.columns = pd.MultiIndex.from_product([["({0})".format(mods_n[keys[m][i+1][3]])], b.columns.tolist()])
            tab2 = pd.concat([a, b], axis=1)
            tab2 = tab2.reindex(ind1)

            # Fill nan
            tab2 = tab2.fillna("").copy()

            # Get rid of se labels
            tab2.index = ["" if "(s.e.)" in x else x for x in tab2.index ]
            # Rename index
            tab2.index.name = "$KS="+repl1[k[1]][1::]
            # Get rid of se labels
            tab2 = tab2.reset_index(col_level=1)

            tab2 = tab2.replace(repl1).copy()

            tab_col[m][k] = tab2

IndexError: list index out of range

In [38]:
m = "norm"
for k,v in tab_col[m].items():
    print(k)
    display(v)

('norm', 'docvecs', 'naics_name', 'All FE-Sim PC')


Unnamed: 0_level_0,Unnamed: 1_level_0,(1),(1),(1),(1),(2),(2),(2),(2)
Unnamed: 0_level_1,"$KS=sim_{DV}(i,j)$",1975-85,1985-95,1995-05,2005-15,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0170,0.0390***,0.0300***,0.0274***,0.0178,0.0415***,0.0277***,0.0222***
1,,(0.0120),(0.0070),(0.0048),(0.0038),(0.0120),(0.0069),(0.0048),(0.0043)
2,"$I_{MSA} * sim_{DV}(pc_{i}, pc_{j})$",,,,,-0.0012,-0.0045,0.0050,0.0085*
3,,,,,,(0.0116),(0.0067),(0.0047),(0.0045)
4,"$sim_{DV}(pc_{i}, pc_{j})$",0.2805***,0.2913***,0.2816***,0.2932***,0.2808***,0.2925***,0.2804***,0.2908***
5,,(0.0090),(0.0055),(0.0040),(0.0036),(0.0095),(0.0057),(0.0042),(0.0039)
6,$N$,40323,110982,215861,344313,40323,110982,215861,344313
7,Adjusted $R^2$,0.12,0.13,0.13,0.08,0.12,0.13,0.13,0.08
8,Year FE,True,True,True,True,True,True,True,True
9,PC FE,True,True,True,True,True,True,True,True


('norm', 'docvecs', 'naics_name', 'All FE-Sim PC MSA')


Unnamed: 0_level_0,Unnamed: 1_level_0,(3),(3),(3),(3),(4),(4),(4),(4)
Unnamed: 0_level_1,"$KS=sim_{DV}(i,j)$",1975-85,1985-95,1995-05,2005-15,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0109,0.0353***,0.0187***,0.0170***,0.0114,0.0357***,0.0202***,0.0194***
1,,(0.0121),(0.0071),(0.0048),(0.0038),(0.0124),(0.0073),(0.0051),(0.0039)
2,"$I_{MSA} * sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",,,,,0.0064,0.0040,0.0103*,0.0261***
3,,,,,,(0.0136),(0.0079),(0.0061),(0.0057)
4,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.1259***,0.1259***,0.1643***,0.1830***,0.1245***,0.1250***,0.1618***,0.1764***
5,,(0.0070),(0.0041),(0.0035),(0.0031),(0.0076),(0.0045),(0.0038),(0.0034)
6,$N$,40323,110982,215861,344313,40323,110982,215861,344313
7,Adjusted $R^2$,0.11,0.11,0.12,0.08,0.11,0.11,0.12,0.08
8,Year FE,True,True,True,True,True,True,True,True
9,PC FE,True,True,True,True,True,True,True,True


('norm', 'docvecs', 'primclass', 'Inv FE-Sim PC MSA')


Unnamed: 0_level_0,Unnamed: 1_level_0,(5),(5),(5),(5),(6),(6),(6),(6)
Unnamed: 0_level_1,"$KS=sim_{DV}(i,j)$",1975-85,1985-95,1995-05,2005-15,1975-85,1985-95,1995-05,2005-15
0,"$I(MSA \, Match)$",0.0100,0.0248***,0.0316***,0.0157***,-0.0047,0.0353***,0.0413***,0.0216***
1,,(0.0143),(0.0081),(0.0054),(0.0041),(0.0213),(0.0112),(0.0075),(0.0058)
2,"$I_{MSA} * sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",,,,,0.0145,-0.0116,-0.0123,-0.0087
3,,,,,,(0.0175),(0.0099),(0.0077),(0.0068)
4,"$sim_{DV}(pc_{i,MSA_i}, pc_{j,MSA_j})$",0.0654***,0.0652***,0.0826***,0.0935***,0.0632***,0.0671***,0.0849***,0.0953***
5,,(0.0075),(0.0045),(0.0038),(0.0035),(0.0079),(0.0048),(0.0041),(0.0038)
6,$N$,38324,106152,205248,324142,38324,106152,205248,324142
7,Adjusted $R^2$,0.08,0.08,0.09,0.06,0.08,0.08,0.09,0.06
8,Year FE,True,True,True,True,True,True,True,True
9,PC FE,True,True,True,True,True,True,True,True


In [39]:
m = "norm"
for k,v in tab_col[m].items():
    print("\n"+str(k)+"\n"+"\n")
    with pd.option_context("max_colwidth", 1000):
        print(v.to_latex(index=False,escape=False, column_format="l|cccc|cccc", multicolumn_format="c"))


('norm', 'docvecs', 'naics_name', 'All FE-Sim PC')


\begin{tabular}{l|cccc|cccc}
\toprule
                                      & \multicolumn{4}{c}{(1)} & \multicolumn{4}{c}{(2)} \\
                   $KS=sim_{DV}(i,j)$ &    1975-85 &    1985-95 &    1995-05 &    2005-15 &    1975-85 &    1985-95 &    1995-05 &    2005-15 \\
\midrule
                    $I(MSA \, Match)$ &     0.0170 &  0.0390*** &  0.0300*** &  0.0274*** &     0.0178 &  0.0415*** &  0.0277*** &  0.0222*** \\
                                      &   (0.0120) &   (0.0070) &   (0.0048) &   (0.0038) &   (0.0120) &   (0.0069) &   (0.0048) &   (0.0043) \\
 $I_{MSA} * sim_{DV}(pc_{i}, pc_{j})$ &            &            &            &            &    -0.0012 &    -0.0045 &     0.0050 &    0.0085* \\
                                      &            &            &            &            &   (0.0116) &   (0.0067) &   (0.0047) &   (0.0045) \\
           $sim_{DV}(pc_{i}, pc_{j})$ &  0.2805*** &  0.2913*** &  0.2816*** &  0

In [34]:
m = "raw"
for k,v in tab_col[m].items():
    print("\n"+str(k)+"\n"+"\n")
    with pd.option_context("max_colwidth", 1000):
        print(v.to_latex(index=False,escape=False, column_format="l|cccc|cccc", multicolumn_format="c"))


('raw', 'docvecs', 'naics_name', 'All FE-Sim PC')


\begin{tabular}{l|cccc|cccc}
\toprule
                                      & \multicolumn{4}{c}{(1)} & \multicolumn{4}{c}{(2)} \\
                   $KS=sim_{DV}(i,j)$ &    1975-85 &    1985-95 &    1995-05 &    2005-15 &    1975-85 &    1985-95 &    1995-05 &    2005-15 \\
\midrule
                    $I(MSA \, Match)$ &     0.0023 &  0.0053*** &  0.0041*** &  0.0037*** &     0.0028 &  0.0072*** &     0.0020 &    -0.0000 \\
                                      &   (0.0016) &   (0.0009) &   (0.0006) &   (0.0005) &   (0.0049) &   (0.0027) &   (0.0019) &   (0.0020) \\
 $I_{MSA} * sim_{DV}(pc_{i}, pc_{j})$ &            &            &            &            &    -0.0038 &    -0.0141 &     0.0158 &    0.0270* \\
                                      &            &            &            &            &   (0.0367) &   (0.0211) &   (0.0149) &   (0.0144) \\
                $I(Primclass\,Match)$ &     0.0018 &   0.0037** &  0.0032*** &   0

#### 2.3 Localization estimate by model for norm, docvecs

In [18]:
local_est = pd.DataFrame()
for k,v in tab_no_c_yr.items():
    if (k[0] == "norm") & (k[1] == "docvecs"):
        v2 = v.loc[[0,1]]
        v2[""] = mods_n[k[3]]
        v2["var"] = ["est", "se"]
        v2["Model Name"] = k[3]
        local_est = local_est.append(v2, ignore_index=True)
        
local_est = local_est.sort_values(["", "var"]).reset_index(drop=True).drop("var",1)
local_est[""] = local_est[""].apply(lambda x: "({0})".format(x))
local_est.loc[list(range(1,len(local_est),2)), ""] = ""

KeyError: 'All FE-Sim PC MSA-Cb'

In [353]:
local_est

Unnamed: 0,Unnamed: 1,1975-85,1985-95,1995-05,2005-15,Model Name
0,(1),0.0209*,0.0407***,0.0334***,0.0287***,All FE-Sim PC
1,,(0.0112),(0.0066),(0.0046),(0.0037),All FE-Sim PC
2,(2),0.0207*,0.0404***,0.0333***,0.0287***,All FE-Sim PC Qd
3,,(0.0112),(0.0066),(0.0046),(0.0037),All FE-Sim PC Qd
4,(3),-0.0480***,-0.0183**,-0.0328***,-0.0224***,All FE-Sim PC MSA
5,,(0.0124),(0.0072),(0.0049),(0.0039),All FE-Sim PC MSA
6,(4),-0.0599***,-0.0344***,-0.0491***,-0.0381***,All FE-Sim PC MSA Qd
7,,(0.0125),(0.0073),(0.0050),(0.0039),All FE-Sim PC MSA Qd
8,(5),0.0208*,0.0401***,0.0319***,0.0219***,All FE-Int PC
9,,(0.0111),(0.0065),(0.0046),(0.0038),All FE-Int PC
