# Author Name Recon Part 2 - combine the author names with the affiliations
Following part 1, there are three main cases of ambiguity that can be resolved by affiliations and other methods, we do so here using a number of strategies. Given the disjointed nature of the affiliations data, I will also use this process to iteratively consolidate the two sets of affiliations for misspellings, missing data etc. present in the affiliations data. As well as checks for completeness of the affiliations data.

Pre-requisites:
- 020
- 021

Input files:
- processed author names from 020
- subset of combined affiliations from 021

Output files:
- combined processed author names with associated affiliations

In [104]:
import pandas as pd
# from unidecode import unidecode
import re
from datetime import date
import json
# import numpy as np
import string
import time
import pprint
# from fuzzywuzzy import fuzz 
# from fuzzywuzzy import process 
# set column options
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

In [105]:
base_path="/Users/sijiawu/Work/Thesis/Data/Affiliations/"

In [106]:
with open('author_proc.json') as f: 
    data = f.read() 

proc_auths_all = json.loads(data) 
aff_sub=pd.read_pickle(base_path+"affiliations_combined_sub.pkl")
j_data=pd.read_pickle(base_path.split('Aff')[0]+"Combined/020_merged_proc_scopus_inception_with_auth_split_2020.pkl")

In [107]:
# Are the ids unique? if true then we are a go!
temp=[]

for i in proc_auths_all.keys():
    temp.append(i.split("/")[-1])

len(temp)==len(list(set(temp)))

True

In [108]:
content_ex=['MISC',  'Discussion', 'Review', 'Review2',"Errata"]
content=['Article', 'Comment', 'Reply', 'Rejoinder']
jid=["aer","ecta","jpe","qje","res"]

In [109]:
aff_sub.dtypes

id                      object
year                     int64
author                  object
auth_num                object
aff_num                 object
aff_cleaner             object
aff_cleaner_final       object
aff_main_final          object
aff_subunit_final       object
aff_department_final    object
alt_final               object
manual                  object
screened                object
country                 object
dtype: object

In [110]:
aff_sub[aff_sub["id"]=="1816971"]

Unnamed: 0,id,year,author,auth_num,aff_num,aff_cleaner,aff_cleaner_final,aff_main_final,aff_subunit_final,aff_department_final,alt_final,manual,screened,country
11232,1816971,1971,Terence F. Kelly,0,1,,,indiana university,,,,,y,United States
11667,1816971,1971,Leslie Singer,1,1,,,urban institute,,,,,y,United States


In [111]:
proc_auths_all['https://doi.org/10.1093/qje/qjaa012']

{'authors': {'0': {'raw': 'Enke, Benjamin',
   'init': 'benjamin enke',
   'auth_suffix': [],
   'a1': 'benjamin enke',
   'a2': 'b. enke',
   'a3': 'b. enke'}},
 'year': 2020,
 'content_type': 'Article',
 'jid': 'qje'}

In [112]:
proc_auths_all["https://www.jstor.org/stable/1816971"]


{'authors': {'0': {'raw': 'Terence F. Kelly',
   'init': 'terence f. kelly',
   'auth_suffix': [],
   'a1': 'terence f. kelly',
   'a2': 't. f. kelly',
   'a3': 't. kelly'},
  '1': {'raw': 'Leslie Singer',
   'init': 'leslie singer',
   'auth_suffix': [],
   'a1': 'leslie singer',
   'a2': 'l. singer',
   'a3': 'l. singer'}},
 'year': 1971,
 'content_type': 'Article',
 'jid': 'aer'}

In [113]:
aff_sub.head()

Unnamed: 0,id,year,author,auth_num,aff_num,aff_cleaner,aff_cleaner_final,aff_main_final,aff_subunit_final,aff_department_final,alt_final,manual,screened,country
0,1825911,1940,Otto Weinberger,0,1,,,vienna austria (city),,,,,Checked,Austria
1,1825513,1940,B. Chait,0,1,,,antwerp (city),,,,MTURK,y,Belgium
2,1883329,1940,Karl H. Niebyl,0,1,,,carleton university,,,,Scopus,,Canada
3,1909098,1940,René Roy,0,1,,,universite paris 1 pantheon-sorbonne,,,,MTURK,,France
4,1814510,1940,Frederick Pollock,0,1,,,international institute of social research,,,,Manual from PDF,y,International


In [114]:
# to replace errors
# names are all in a1 format
repl_affs=[
 {"URL":"https://www.jstor.org/stable/1815116","author":"tjalling koopmans","Affs":{}},
 {"URL":"https://www.jstor.org/stable/1906863","author":"t. koopmans","Affs":{"League of Nations, Geneva"}},
 {"URL":"https://www.jstor.org/stable/1812749", "content_type":"Review"},
 {"URL":"https://www.jstor.org/stable/1914508","author":"edward m. bernstein","Affs":{}},
 {"URL":"https://www.jstor.org/stable/788", "content_type":"Errata"},
 {"URL":"https://www.jstor.org/stable/1815118","note":"set all affiliations to null"},
 {"URL":"https://www.jstor.org/stable/1824625","content_type":"Review"},
 {"URL":"https://www.jstor.org/stable/26160279","author":"ahmed m. mobarak", "Affs":{"yale university"}},
 {"URL":"https://www.jstor.org/stable/1912661","author":"peter morgan","Affs":{'flinders university', 'university of western ontario'}},
 {"URL":"https://www.jstor.org/stable/2296270","author":"n. kaldor", "Affs":{'cambridge united kingdom (city)'}},
  {"URL":"https://www.jstor.org/stable/2296103","author":"nicholas kaldor", "Affs":{'cambridge united kingdom (city)'}},
 {"URL":"https://www.jstor.org/stable/2296292","author":"nicholas kaldor", "Affs":{'cambridge united kingdom (city)'}},
 {"URL":"https://www.jstor.org/stable/2295991","author":"nicholas kaldor", "Affs":{'cambridge united kingdom (city)'}},
 {"URL":"https://www.jstor.org/stable/1907917", "content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1914269", "content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/2937866", "author":"john sutton", "Affs": {"london school of economics and political science"}},
  {"URL": "https://www.jstor.org/stable/2938372", "author":"a. araujo", "Affs":{"instituto nacional de matematica pura e aplicada - impa"}},
  {"URL":"https://www.jstor.org/stable/2298003","content_type":"MISC"},
  {"URL":"https://www.jstor.org/stable/2938185","content_type":"MISC"},
  {"URL":"https://www.jstor.org/stable/2118354", "Affs":{'university of chicago'}},
  {"URL":"https://www.jstor.org/stable/2006601", "author":"roger gordon", "Affs":{'university of michigan'}},
  {"URL":"https://www.jstor.org/stable/1913405", "content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1914028", "content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1809686", "author":"michael visscher","Affs":{}},
  {"URL":"https://www.jstor.org/stable/1909551", "author":"lance taylor", "Affs":{"harvard university"}},
  {"URL":"https://www.jstor.org/stable/1907286","author":"herman o. a. wold","Affs":{"uppsala university"}},
  {"URL":"https://www.jstor.org/stable/2296671","author":"richard zeckhauser","Affs":{'london school of economics and political science', "harvard university"}},
  {"URL":'https://www.jstor.org/stable/1829573',"Affs":{'yale university'}, "author":"peter mieszkowski" },
  {"URL":"https://www.jstor.org/stable/1818422","Affs":{'university of wisconsin'},"author":"peter helmberger"},
  {"URL":"https://www.jstor.org/stable/1805224","Affs":{'california institute of technology'},"author":"james p. quirk"},
  {"URL":'https://www.jstor.org/stable/1830732',"Affs":{'indiana university'},"author":"george w. wilson"},
  { "URL":'https://www.jstor.org/stable/2938184',"Affs":{},'author': "daniel mcfadden", "content_type":"MISC"},
  {"URL":'https://www.jstor.org/stable/1828777',"Affs":{'university of california - los angeles'}, "author":"armen a. alchian"},
  {"URL":'https://www.jstor.org/stable/1821381', "Affs":{}, "author":"john fei"},
  {"URL": 'https://www.jstor.org/stable/1810390', "Affs":{}, "author":"charles j. hitch"},
  {"URL":'https://www.jstor.org/stable/1914080',"content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1815263","content_type":"Errata", "Affs":{}},
  {"URL":"https://www.jstor.org/stable/1817026", "Affs":{"princeton university"}},
  {"URL":"https://www.jstor.org/stable/1879538", "author":"m. a. adelman", "Affs":{"massachusetts institute of technology - mit"}},
  {"URL":"https://www.jstor.org/stable/117014","author":"jack hirshleifer", "Affs":{"university of california - los angeles"}},
  {"URL":"https://www.jstor.org/stable/1907921","Affs":{'paris institute of statistics','ecole nationale superieure des mines de paris'}, "author":"m. allais"},
  {"URL":"https://www.jstor.org/stable/1801806", "Affs":{}, "author":"charles d. hyson"},
  {"URL":"https://www.jstor.org/stable/1807882", "Affs":{}, "author":"d. levhari"},
  {"URL":"https://www.jstor.org/stable/1905648","Affs":{'ohio state university'}, "author":"r. c. bushnell"},
  {"URL":"https://www.jstor.org/stable/1829153","Affs":{'university of chicago'},"author":"h. g. grubel"},
  {"URL":"https://www.jstor.org/stable/1909972", "content_type":"MISC"},
  {"URL":"https://www.jstor.org/stable/1911291", "Affs":{"budapest"}},
  {"URL":"https://www.jstor.org/stable/1911804","Affs":{"university of wisconsin"},"author":"a. s. goldberger"},
  {"URL":"https://www.jstor.org/stable/1812051", "Affs":{"yale university"}},
  {"URL":"https://www.jstor.org/stable/1910160","content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1833277","author":"l. a. ihnen", "Affs":{"north carolina state university"}},
  {"URL":"https://www.jstor.org/stable/24029256","author":"david schmeidler", "Affs":{"tel aviv university"}},
  {"URL":"https://www.jstor.org/stable/1912661","Affs":{'university of canterbury', 'university of western ontario'},"author":"richard manning"},
  {"URL":"https://www.jstor.org/stable/1821511","Affs":{}, "content_type":"Errata"},
  {"URL":"https://www.jstor.org/stable/1828711","Affs":{"rand corporation"},"author":"l. s. shapley"},
  {"URL":"https://www.jstor.org/stable/23469711","Affs":{"central michigan university"},"author":"jason e. taylor"},
   {"URL":"https://www.jstor.org/stable/1816432","Affs":{},"author":"imre d. vegh"},
   {"URL":"https://www.jstor.org/stable/1806086","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/42920923","author": "charles hokayem", "Affs":{"us census bureau"}},
   {"URL":"https://www.jstor.org/stable/10.1086/666972","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/10.1086/666655","Affs":{"washington university - st. louis"},"author":"scott baker"},
   {"URL":"https://www.jstor.org/stable/1913118","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/1905318","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/1911890","Affs": {"washington dc"}},
   {"URL":"https://www.jstor.org/stable/10.1086/669673","content_type":"MISC"},
   {"URL":"https://www.jstor.org/stable/2006689","author":"arlie g. sterling", "Affs":{"marsoft incorportated, cambridge, ma"}},
   {"URL":"https://www.jstor.org/stable/1911484","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/25592467","author":"catharine hill", "Affs":{"vassar college"}},
   {"URL":"https://www.jstor.org/stable/117144","affs":{"institute for international economics"}},
   {"URL":"https://www.jstor.org/stable/27871243", "author":"susan e. woodward", "Affs":{"sand hill econometrics"}},
   {"URL":"https://www.jstor.org/stable/27805049","author":"susan e. woodward", "Affs":{"sand hill econometrics"}},
   {"URL":'https://www.jstor.org/stable/1814237',"author":'arthur burns',"Affs":{},"content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/1828397","author":"r. j. wonnacott","Affs":{"university of western ontario"}},
   {"URL":"https://www.jstor.org/stable/1881797","author":"n. liviatan","Affs":{"hebrew university of jerusalem"}},
   {"URL":"https://www.jstor.org/stable/1816971","author":"leslie singer", "Affs":{"indiana university"}},
   {"URL":"https://www.jstor.org/stable/1816971","author":"terence f. kelly", "Affs":{"urban institute"}},
   {"URL":"https://www.jstor.org/stable/1880712","author":"paul darling","Affs":{"bowdoin university"}},
   {"URL":"https://www.jstor.org/stable/1817238","author":"richard a. kasten","Affs":{"congressional budget office"}},
   {"URL":"https://www.jstor.org/stable/2296306","author":"g. h. borts","Affs":{"providence r. i."}},
   {"URL":"https://www.jstor.org/stable/1907297","author":"colin clark","Affs":{"queensland (australia)"}},
   {"URL":"https://www.jstor.org/stable/2296086", "author": "r. triffin", "Affs": {"washington dc (city)"}},
   {"URL":"https://www.jstor.org/stable/117278","author": "d. g. johnson","Affs":{"university of chicago"}},
   {"URL":"https://www.jstor.org/stable/1814297","content_type":"Correction","Affs":{}},
   {"URL":"https://www.jstor.org/stable/1913335", "content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/1912690","author":"d. e. a. giles", "Affs":{"reserve bank of new zealand"}},
   {"URL":"https://www.jstor.org/stable/1912690","author":"m. l. king", "Affs":{"monash university"}},
   {"URL":"https://www.jstor.org/stable/2296717","author":"w. neuefeind","Affs":{"university of bonn", "center for operations research and econometrics - core"}},
   {"URL":"https://www.jstor.org/stable/1814042","author":"d. r. kamerschen","Affs":{},"content_type":"Correction"},
   {"URL":"https://www.jstor.org/stable/2296392","content_type":"Correction"},
   {"URL":"https://www.jstor.org/stable/2295902","content_type":"Correction"},
   {"URL":"https://www.jstor.org/stable/1911291","author":"t. liptak", "Affs":{"budapest"}},
   {"URL":"https://www.jstor.org/stable/2296251","author":"peter robson","Affs":{"cambridge united kingdom (city)"}},
   {"URL":"https://www.jstor.org/stable/1911484","content_type":"Errata"},
   {"URL":"https://www.jstor.org/stable/1814737","author":"d. c. stapleton","Affs":{"university of british columbia"}},
   {"URL":"https://www.jstor.org/stable/1823935","Affs":{}},
   {"URL":"https://www.jstor.org/stable/2296119", "author":"r. l. marris","Affs":{"cambridge united kingdom (city)"}},
   {"URL":"https://www.jstor.org/stable/1882107","author":"d. netzer","Affs":{'federal reserve bank of chicago'}},
   {"URL":"https://www.jstor.org/stable/1814167", "author":"richard v. clemence", "Affs":{'wellesley college'}},
   {"URL":"https://www.jstor.org/stable/10.1086/669673", "content_type":"MISC"},
   {"URL":"https://www.jstor.org/stable/1913335","content_type":"Errata", "Affs":{}},
   {"URL":"https://www.jstor.org/stable/1817003", "Affs":{"us air force academy"}},
   {"URL":"https://www.jstor.org/stable/1814878","Affs":{"university of wisconsin"}}
   ]

# replace at source of j_data
for i in repl_affs:
    if "content_type" in i:
        j_data.loc[j_data["URL"]==i["URL"],"content_type"]=i["content_type"]

In [115]:
key_match={}
for i in proc_auths_all.keys():
    auths=proc_auths_all[i]["authors"].keys()
    key_match[i.split('/')[-1]]=i
    for j in auths:
        proc_auths_all[i]['authors'][j]['affs']={}

In [116]:
aff_sub['auth_num']=aff_sub['auth_num'].astype(int)

In [117]:
aff_sub.head()
# aff_sub[aff_sub["id"]=="1816971"]

Unnamed: 0,id,year,author,auth_num,aff_num,aff_cleaner,aff_cleaner_final,aff_main_final,aff_subunit_final,aff_department_final,alt_final,manual,screened,country
0,1825911,1940,Otto Weinberger,0,1,,,vienna austria (city),,,,,Checked,Austria
1,1825513,1940,B. Chait,0,1,,,antwerp (city),,,,MTURK,y,Belgium
2,1883329,1940,Karl H. Niebyl,0,1,,,carleton university,,,,Scopus,,Canada
3,1909098,1940,René Roy,0,1,,,universite paris 1 pantheon-sorbonne,,,,MTURK,,France
4,1814510,1940,Frederick Pollock,0,1,,,international institute of social research,,,,Manual from PDF,y,International


In [118]:
g=[]
issues=[]
for i in aff_sub.index:
    g.append(key_match[str(aff_sub.loc[i,'id'])])
    # print(key_match[str(aff_sub.loc[i,'id'])])
    # print(proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['authors'])
    # print(str(aff_sub.loc[i,'auth_num']))
    if proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['content_type'] in content_ex:
        continue        
    if proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['year']<1940:
        continue
    try:
        auth_ob=proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['authors'][str(aff_sub.loc[i,'auth_num'])]['affs']
        proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['authors'][str(aff_sub.loc[i,'auth_num'])]["alt_auth"]=aff_sub.loc[i,"author"]
        auth_ob[aff_sub.loc[i, 'aff_num']]=aff_sub.loc[i,['aff_main_final', 'aff_subunit_final','aff_department_final', 'alt_final', 'manual', 'screened',"country"]].to_dict()
    except:
        print(proc_auths_all[key_match[str(aff_sub.loc[i,'id'])]]['authors'])
        print(str(aff_sub.loc[i,'auth_num']))
        issues.append(key_match[str(aff_sub.loc[i,'id'])])
    

In [119]:
len(issues)

0

In [120]:
missing_data_affs={"https://doi.org/10.1093/qje/qjaa012":{"0":{1: {
    'aff_department_final': None,
    'aff_main_final': 'harvard university',
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
    },
2:{
    'aff_department_final': None,
    'aff_main_final':"national bureau of economic research - nber",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
}
}},
'https://www.jstor.org/stable/26921614': {
      '0': {1:
            {
    'aff_department_final': None,
    'aff_main_final': "boston university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            }
},
      '1': {1:
            {
    'aff_department_final': None,
    'aff_main_final': "shandong university of finance and economics",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "China"
            },
            2:
            {
    'aff_department_final': None,
    'aff_main_final': "macquarie university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "Canada"
            }},
      '2': {1:
            {
    'aff_department_final': None,
    'aff_main_final': "university of southern california",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            },2:
            {
    'aff_department_final': None,
    'aff_main_final': "shanghai jiaotong university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "China"
            }},
      '3': {1:
            {
    'aff_department_final': None,
    'aff_main_final': "university of international business and economics",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "China"
            }}
      },
    "https://www.jstor.org/stable/43821401":{
        "0":{1:
            {
    'aff_department_final': None,
    'aff_main_final': "mit",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            },
            2:
            {
    'aff_department_final': None,
    'aff_main_final': "national bureau of economics - nber",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            }},
        "1":{1:
            {
    'aff_department_final': None,
    'aff_main_final': "bocconi university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "Italy"
            },
            2:
            {
    'aff_department_final': None,
    'aff_main_final': "IGIER",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "Italy"
            }},
        "2":{1:
            {
    'aff_department_final': None,
    'aff_main_final': "columbia university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            }}
    },
    "https://www.jstor.org/stable/42920997":{"0":{1:
            {
    'aff_department_final': None,
    'aff_main_final': "stanford university",
    'aff_subunit_final': None,
    'alt_final': None,
    'manual': 'manual',
    'screened': 'NA',
    "country": "United States"
            }}}  
      
}

In [121]:
for i in missing_data_affs.keys():
    print(i)
    print(proc_auths_all[i])
    for j in proc_auths_all[i]['authors'].keys():
        proc_auths_all[i]['authors'][j]["affs"]=missing_data_affs[i][j]


https://doi.org/10.1093/qje/qjaa012
{'authors': {'0': {'raw': 'Enke, Benjamin', 'init': 'benjamin enke', 'auth_suffix': [], 'a1': 'benjamin enke', 'a2': 'b. enke', 'a3': 'b. enke', 'affs': {}}}, 'year': 2020, 'content_type': 'Article', 'jid': 'qje'}
https://www.jstor.org/stable/26921614
{'authors': {'0': {'raw': 'Raymond Fisman', 'init': 'raymond fisman', 'auth_suffix': [], 'a1': 'raymond fisman', 'a2': 'r. fisman', 'a3': 'r. fisman', 'affs': {}}, '1': {'raw': 'Jing Shi', 'init': 'jing shi', 'auth_suffix': [], 'a1': 'jing shi', 'a2': 'j. shi', 'a3': 'j. shi', 'affs': {}}, '2': {'raw': 'Yongxiang Wang', 'init': 'yongxiang wang', 'auth_suffix': [], 'a1': 'yongxiang wang', 'a2': 'y. wang', 'a3': 'y. wang', 'affs': {}}, '3': {'raw': 'Weixing Wu', 'init': 'weixing wu', 'auth_suffix': [], 'a1': 'weixing wu', 'a2': 'w. wu', 'a3': 'w. wu', 'affs': {}}}, 'year': 2020, 'content_type': 'Article', 'jid': 'aer'}
https://www.jstor.org/stable/43821401
{'authors': {'0': {'raw': 'George-Marios Angeleto

In [122]:
proc_auths_all["https://www.jstor.org/stable/1816971"]

{'authors': {'0': {'raw': 'Terence F. Kelly',
   'init': 'terence f. kelly',
   'auth_suffix': [],
   'a1': 'terence f. kelly',
   'a2': 't. f. kelly',
   'a3': 't. kelly',
   'affs': {1: {'aff_main_final': 'indiana university',
     'aff_subunit_final': nan,
     'aff_department_final': nan,
     'alt_final': nan,
     'manual': 'NA',
     'screened': 'y',
     'country': 'United States'}},
   'alt_auth': 'Terence F. Kelly'},
  '1': {'raw': 'Leslie Singer',
   'init': 'leslie singer',
   'auth_suffix': [],
   'a1': 'leslie singer',
   'a2': 'l. singer',
   'a3': 'l. singer',
   'affs': {1: {'aff_main_final': 'urban institute',
     'aff_subunit_final': nan,
     'aff_department_final': nan,
     'alt_final': nan,
     'manual': 'NA',
     'screened': 'y',
     'country': 'United States'}},
   'alt_auth': 'Leslie Singer'}},
 'year': 1971,
 'content_type': 'Article',
 'jid': 'aer'}

In [123]:
# replace at 
for i in repl_affs:
    aff_ord=1
    aff_temp={}
    if "Affs" in i:
        for j in i["Affs"]:
            aff_temp[str(int(aff_ord))]={'aff_main_final': j,
            'aff_subunit_final': None,
            'aff_department_final': None,
            'alt_final': None,
            'manual': 'MANUAL',
            'screened': 'NA'}
            aff_ord+=1

    if "author" in i:
        id=i["URL"].split("/")[-1]
        mth=0
        for j in proc_auths_all[i["URL"]]["authors"].keys():
            if proc_auths_all[i["URL"]]["authors"][j]["a1"]==i["author"]:
                mth=1
                if len(i["Affs"])==0:
                    proc_auths_all[i["URL"]]["authors"][j]["affs"]={}
                else:
                    proc_auths_all[i["URL"]]["authors"][j]["affs"]=aff_temp
        if mth==0:
            print(i)
            print(i["URL"].split("/")[-1])
            print("########")
            print(i["author"])
            print(i["Affs"])
            print("no match")
            print(proc_auths_all[i["URL"]])
            print()
    else:
        if "Affs" in i:
            for j in proc_auths_all[i["URL"]]["authors"].keys():
                # print(j)
                if len(i["Affs"])==0:
                    proc_auths_all[i["URL"]]["authors"][j]["affs"]={}
                else:
                    proc_auths_all[i["URL"]]["authors"][j]["affs"]=aff_temp

In [124]:
j_data.columns

Index(['issue_url', 'author', 'title', 'journal', 'volume', 'number', 'pages',
       'year', 'ISSN', 'abstract', 'URL', 'publisher', 'content_type', 'type',
       'jid', 'author_split', 'urldate', 'reviewed-author', 'uploaded',
       'title_10', 'URL_og', 'number_og', 'title_og', 'author_og', 'pages_og',
       'j_fix', 'scopus_jid', 'scopus_id', 'scopus_authorgroup',
       'scopus_authors', 'scopus_affiliations', 'scopus_references',
       'scopus_author_full_names', 'scopus_title', 'scopus_year',
       'scopus_source_title', 'scopus_volume', 'scopus_issue', 'scopus_art_no',
       'scopus_page_start', 'scopus_page_end', 'scopus_page_count',
       'scopus_cited_by', 'scopus_doi', 'scopus_abstract', 'scopus_publisher',
       'scopus_document_type', 'scopus_publication_stage',
       'scopus_open_access', 'scopus_source', 'scopus_eid', 'scopus_title_og',
       'scopus_volume_og', 'scopus_issue_og', 'scopus_page_start_og',
       'scopus_page_end_og', 'scopus_year_og', 's_fix', 

In [125]:
with open(base_path+"author_proc_w_aff.json", "w") as outfile: 
    json.dump(proc_auths_all, outfile, indent=4, default=int)

j_data.to_pickle(base_path.split('Aff')[0]+"Combined/022_merged_proc_scopus_inception_with_auth_split_2020_content_type_corrected.pkl")

In [126]:
list(proc_auths_all.keys())[0]

'https://doi.org/10.1093/qje/qjaa012'

In [127]:
proc_auths_all["https://www.jstor.org/stable/1816971"]

{'authors': {'0': {'raw': 'Terence F. Kelly',
   'init': 'terence f. kelly',
   'auth_suffix': [],
   'a1': 'terence f. kelly',
   'a2': 't. f. kelly',
   'a3': 't. kelly',
   'affs': {'1': {'aff_main_final': 'urban institute',
     'aff_subunit_final': None,
     'aff_department_final': None,
     'alt_final': None,
     'manual': 'MANUAL',
     'screened': 'NA'}},
   'alt_auth': 'Terence F. Kelly'},
  '1': {'raw': 'Leslie Singer',
   'init': 'leslie singer',
   'auth_suffix': [],
   'a1': 'leslie singer',
   'a2': 'l. singer',
   'a3': 'l. singer',
   'affs': {'1': {'aff_main_final': 'indiana university',
     'aff_subunit_final': None,
     'aff_department_final': None,
     'alt_final': None,
     'manual': 'MANUAL',
     'screened': 'NA'}},
   'alt_auth': 'Leslie Singer'}},
 'year': 1971,
 'content_type': 'Article',
 'jid': 'aer'}