In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import tqdm
from collections import Counter
pd.set_option('display.float_format', lambda x: '%.2f' % x)
import matplotlib
import statistics 
import math
import pickle
import scipy.io as scio
import json
import os
import datetime
import sys

def save_pkl(path,obj):
    with open(path, 'wb') as f:
        pickle.dump(obj,f)
        
def load_pkl(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

print(os.listdir())

['5_1_PSW.R', 'draw', '3_sample_scientist.ipynb', '2_interplays.ipynb', '4_regression_result.ipynb', '.ipynb_checkpoints', '5_2_PSW_result.ipynb', '1_processed_data.ipynb']


# deal with source data

In [None]:
paper_doi = []
authors_name = []
dates = []
paper_type =[]
for pairs in tqdm.tqdm(os.walk("/public/aps/raw_data/aps-dataset-metadata-2020/", topdown=False)):
    root = pairs[0]
    files = pairs[2]
    for name in files:
        path = os.path.join(root, name)
        author_name = []
        with open(path, 'r') as f:   #读取当前目录的json文件并解码成python数据
            data = json.load(f)  #每个论文的数据
            
            date_list = list(map(int,data['date'].split('-'))) ##用横杠分割字符串并储存为list_int
            date = datetime.date(date_list[0],date_list[1],date_list[2])
            dates.append(date)
            
            paper_doi.append(data['id'])
            
            if 'articleType' in data.keys():
                paper_type.append(data['articleType'])
            else:
                paper_type.append(None)
                
            if 'authors' in data.keys():
                for i in range(len(data['authors'])):
                    author_name.append(data['authors'][i]['name'])
            else: x##有些作者没有名字记载
                author_name = None
            authors_name.append(author_name)

In [None]:
meta_paper_data = pd.DataFrame({
    'paperDoi':paper_doi,
    'authorName':authors_name,
    'date':dates,
    'type':paper_type
})
save_pkl('./data/meta_paper_data.pkl', meta_paper_data)

In [None]:
meta_paper_data.date.min(),meta_paper_data.date.max(),len(meta_paper_data)

# count five-year citations

In [None]:
meta_paper_data = load_pkl('./meta_paper_data.pkl')
doi_date = dict(zip(meta_paper_data.paperDoi,meta_paper_data.date))
cit_pair = pd.read_csv('/public/aps/raw_data/aps-dataset-citations-2020.csv')
cit_pair_with_time = pd.merge(cit_pair, meta_paper_data[['paperDoi','date']].rename(columns={'paperDoi':'citing_doi','date':'citing_pubdate'}), on='citing_doi', how='left').drop_duplicates()
cit_pair_with_time = pd.merge(cit_pair_with_time, meta_paper_data[['paperDoi','date']].rename(columns={'paperDoi':'cited_doi','date':'cited_pubdate'}), on='cited_doi', how='left').drop_duplicates()
cit_pair_with_time = cit_pair_with_time.dropna().reset_index(drop=True)# 引用论文没有记载时间的删掉

In [None]:
# 统计n年内被引用，和所有年份被引用
n = 5
# 只需要遍历一遍就行了
# 论文被谁引用了
citation_dict = {}
# 论文引用了谁
reference_dict = {}

for i in tqdm.tqdm(range(len(cit_pair_with_time))):
    # 被引数据：5年内
    cited_pub_i = cit_pair_with_time.cited_pubdate.iloc[i]
    citing_pub_i = cit_pair_with_time.citing_pubdate.iloc[i]
    delta = citing_pub_i-cited_pub_i
    cited_doi_i = cit_pair_with_time.cited_doi.iloc[i]
    citing_doi_i = cit_pair_with_time.citing_doi.iloc[i]
    
    #  如果有时间记载
    if (delta>=datetime.timedelta(days=0))&(delta<=datetime.timedelta(days=n*365)): ##五年的另外一种表示 365*5
        if cited_doi_i in citation_dict.keys():
        ## 如果被引论文已经在被引字典里了
            citation_dict[cited_doi_i].add(citing_doi_i)
        else:
        ## 如果被引论文还没有在字典里，第一条
            citation_dict[cited_doi_i] = set([citing_doi_i])

    # 引用数据，全部  
    if citing_doi_i in reference_dict.keys():
        reference_dict[citing_doi_i].add(cited_doi_i)
    else:
        reference_dict[citing_doi_i] = set([cited_doi_i])

# assign citations to paper data

In [None]:
meta_paper_data = meta_paper_data.drop_duplicates(subset=['paperDoi', 'date'])
### 如果论文不在citation列表里，代表没有引用数据记载，则引用为0
citations = []
references = []
cit_count =[]
ref_count = []
for i in tqdm.tqdm(range(len(meta_paper_data))):
    paperdoi = meta_paper_data.paperDoi.iloc[i]
    if paperdoi in citation_dict.keys():
        citations.append(citation_dict[paperdoi])
        cit_count.append(len(citation_dict[paperdoi]))
    else:
        citations.append(set([]))
        cit_count.append(0)
    if paperdoi in reference_dict.keys():
        references.append(reference_dict[paperdoi])
        ref_count.append(len(reference_dict[paperdoi]))
    else:
        references.append(set([]))
        ref_count.append(0)

meta_paper_data_2 = meta_paper_data.copy()
meta_paper_data_2['citations'] = citations
meta_paper_data_2['citCount'] = cit_count
meta_paper_data_2['references'] = references
meta_paper_data_2['refCount'] = ref_count

meta_paper_data_2 = meta_paper_data_2.drop_duplicates(subset=['paperDoi', 'date']).reset_index(drop = True)
save_pkl('./data_meta_paper_data_2.pkl',meta_paper_data_2)

In [None]:
meta_data_2['logCit'] = [np.log(i+1) for i in meta_data_2['citCount']]
meta_data_2['year'] = [i.year for i in meta_data_2['date']]
plt.plot(meta_data_2.groupby('year').logCit.mean())

# deal with genres

In [None]:
PACS = pd.read_csv('/public/aps/raw_data/PACS.txt', keep_default_na=False)
def get_genre(name):
    PACS_code = []
    for i in tqdm.tqdm(range(len(PACS))):
        p_code = re.match(r'((.*)([0-9]{4})(.*))', str(PACS[name].iloc[i]).replace('.','').replace(' ','').replace(':',''))
        if p_code:
            if p_code.group(1)[:2].isdigit():
                PACS_code.append(p_code.group(1)[:6].replace('−','-').replace('–','-'))
            else:
                PACS_code.append(None)
        else:
            PACS_code.append(None)
    return PACS_code

genres_set = set(get_genre('PACS1') + get_genre('PACS2') + get_genre('PACS3') + get_genre('PACS4') + get_genre('PACS5'))

PACS1 = get_genre('PACS1')
PACS2 = get_genre('PACS2')
PACS3 = get_genre('PACS3')
PACS4 = get_genre('PACS4')
PACS5 = get_genre('PACS5')

genres_list = []
for i in tqdm.tqdm(range(len(PACS))):
    gl = [PACS1[i],PACS2[i],PACS3[i],PACS4[i],PACS5[i]]
    while None in gl:
        gl.remove(None)
    genres_list.append(gl)
    
doi_cor_genre = dict(zip(PACS.DOI,genres_list))
len(doi_cor_genre)
doi_cor_genre['10.1103/PhysRevA.60.R2614'] = ['0365Bz', '4250Dv', '89701c']
doi_cor_genre['10.1103/PhysRevB.66.104415'] = ['7570Pa', '71301h', '78202e']
doi_cor_genre['10.1103/PhysRevE.65.026128'] = ['05202y', '04402b', '05901m']

# assgin PACS data to paper data

In [None]:
genres = []
for i in tqdm.tqdm(range(len(meta_paper_data_2))):
    if meta_paper_data_2.paperDoi.iloc[i] in doi_cor_genre.keys():
        genres.append(doi_cor_genre[meta_paper_data_2.paperDoi.iloc[i]])
    else:
        genres.append(None)

In [None]:
meta_paper_data_3 = meta_paper_data_2.copy()
meta_paper_data_3['genres'] = genres

In [None]:
meta_paper_data_3 = meta_paper_data_3.dropna(subset=['authorName','date','paperDoi']).reset_index(drop=True)
part = meta_paper_data_3[(meta_paper_data_3.date>=datetime.date(1976,1,1))&(meta_paper_data_3.date<=datetime.date(2015,12,31))]
part.isnull().sum()/len(part)

In [None]:
save_pkl('./data/meta_paper_data_3.pkl',meta_paper_data_3)

# name disambiguation

## build data

In [None]:
paper_doi = []
authors_name = []
dates = []
paper_affs = []
for pairs in tqdm.tqdm(os.walk("/public/aps/raw_data/aps-dataset-metadata-2020/", topdown=False)):
    root = pairs[0]
    files = pairs[2]
    for name in files:
        path = os.path.join(root, name)
        author_name = []
        with open(path, 'r') as f:   #读取当前目录的json文件并解码成python数据
            data = json.load(f)  #每个论文的数据
            
            date_list = list(map(int,data['date'].split('-'))) ##用横杠分割字符串并储存为list_int
            date = datetime.date(date_list[0],date_list[1],date_list[2])
            dates.append(date)
            
            paper_doi.append(data['id'])
                
            if 'authors' in data.keys():
                for i in range(len(data['authors'])):
                    if 'affiliationIds' in data['authors'][i].keys():
                        author_name.append((data['authors'][i]['name'], data['authors'][i]['affiliationIds']))
                    else:
                        author_name.append((data['authors'][i]['name'], None))

            else: ##有些作者没有名字记载
                author_name = None
            authors_name.append(author_name)
            
            if 'affiliations' in data.keys():
                aff_dict = {}
                for aff in data['affiliations']:
                    aff_dict[aff['id']]=aff['name']
                paper_affs.append(aff_dict)
            else:
                paper_affs.append(None)

In [None]:
meta_data = pd.DataFrame({
    'paperDoi':paper_doi,
    'authorName':authors_name,
    'date':dates,
    'paperAff':paper_affs})

## processing

In [None]:
meta_data = meta_data.dropna().reset_index(drop=True)

In [None]:
aid = []
author_name = []
alter_name = []
doi = []
ref_set = []
coauthor_set = []
author_aff = []
first_letter = []
last_name = []
name_split = []
gid = []
journal = []
raw_name = []
count = 0
test = 0
for i in tqdm.tqdm(range(len(meta_data))):
# for i in tqdm.tqdm(range(2)):
    p_doi = meta_data.paperDoi.iloc[i]
    authors_info = meta_data.authorName.iloc[i]
    #每篇文章的作者ids
    pap_aids = [str(count+i) for i in range(len(authors_info))]
    ## 机构对应名字的词典
    paper_aff_dict = meta_data.paperAff.iloc[i]
    
    for a_info in authors_info:
        ## 作者id就是count顺序编号
        aname = a_info[0].lower().strip().replace('_','')
        if aname[0] in set(['\n', '\u2008', '<', '\xa0', '.', '[', '(']):
            test+=1
            break
        aid.append(set([str(count)]))
        gid.append(str(count))
        aname_fix = re.sub(u"\\(.*\\)|\\{.*}|\\[.*]", "", aname).replace(', jr.','').replace(' jr.','').strip()
        author_name.append(aname_fix) #作者名字
        alter_name.append(set([aname_fix])) #同义的作者名字
        raw_name.append(a_info[0])
        sp_list = aname_fix.split()
        name_split.append(sp_list)
        first_letter.append(aname_fix[0])
        last_name.append(sp_list[-1])
        
        aff_name = []
        for i in a_info[1]:
            if i in paper_aff_dict:
                aff_name.append(paper_aff_dict[i].lower()) #机构名字的字符，而不是代号# 可能存在作者机构在文章机构中没有记载的情况，这种情况下作者机构为空
        author_aff.append(set(aff_name)) #一个作者可能有多个机构
        
        doi.append(set([p_doi]))
        journal.append(set([re.sub('[\d,./]', '', p_doi)]))
        ##被哪些doi引用了，用于互引，所以不是一篇论文没有关系
        if p_doi in reference_dict.keys():
            ref_set.append(reference_dict[p_doi])
        else:
            ref_set.append(set([]))
        ## 合作作者id为这篇paper的其他作者id
        coauthor_set.append(set(pap_aids)-set([str(count)]))
        count += 1

In [None]:
author_paper = pd.DataFrame({
    'gid':gid,
    'aid':aid,
    'rawName':raw_name,
    'authorName':author_name,
    'firstLetter':first_letter,
    'lastName':last_name,
    'alterName':alter_name,
    'nameSplit':name_split,
    'doi':doi,
    'journal':journal,
    'refSet':ref_set,
    'coauthorSet':coauthor_set,
    'authorAff':author_aff,
})

In [None]:
sim_group = {}
for key, value in tqdm.tqdm(author_paper.groupby(['firstLetter','lastName'])):
    sim_group[key] = value

In [None]:
global aff_names # 大大的string
aff_names = ''
for i in tqdm.tqdm(range(len(meta_data))):
    try:
        for j in list(meta_data['paperAff'].iloc[i].values()):
            term_list = j.replace(',','').replace('.','').lower().split()
            term_str = ' '.join(list(set(term_list)))
            aff_names+=term_str
            aff_names+=' '
    except:
        continue # 为空的情况

In [None]:
def termFrequency(term, document):
#     print(document.count(term),float(len(document)))
    return document.count(term) / float(len(document))

def computeTf(document):
    sentence = document.replace(',','').replace('.','').lower().split()
    tf= dict.fromkeys(set(sentence), 0)
    for word in sentence:
        tf[word] = termFrequency(word, sentence)
    return tf

def inverseDocumentFrequency(term, documents): 
    global idf_dict
    if term in idf_dict.keys():
        df = idf_dict[term]
    else:
        df = documents.count(term)
        idf_dict[term] = df
    return math.log(float(1348385) / df)
    
def computeIdf(document, documents):
    idf_dict = {}
    sentence = document.replace(',','').replace('.','').lower().split()
    for word in sentence:
        idf_dict[word] = inverseDocumentFrequency(word, documents)
    return idf_dict

def tfIdf(aff_name, aff_names):
    vec = {}
    vec_tf = computeTf(aff_name)
    vec_idf = computeIdf(aff_name, aff_names)
    for key in vec_tf.keys():
        vec[key] = vec_tf[key]*vec_idf[key]
    return vec

def calSim(aff_name1, aff_name2, aff_names):
    global idf_dict
    
    tf_idf_1 = tfIdf(aff_name1, aff_names)
    tf_idf_2 = tfIdf(aff_name2, aff_names)
    vec1 = []
    vec2 = []
#     print(set(tf_idf_1.keys()).union(set(tf_idf_2.keys())))
    for key in (set(tf_idf_1.keys()).union(set(tf_idf_2.keys()))):
        if key in tf_idf_1.keys():
            vec1.append(tf_idf_1[key])
        else:
            vec1.append(0)
        if key in tf_idf_2.keys():
            vec2.append(tf_idf_2[key])
        else:
            vec2.append(0)
            
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    sim = vec1.dot(vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))
    return sim

In [None]:
def isSameNameSet(nameset1, nameset2):
    flag = '2'
    for name1 in nameset1:
        for name2 in nameset2:
            if isSameName(name1, name2):
                if ('1' in isSameName(name1, name2)):#全名相同
#                     print('1', name1, name2)
                    return '1'
    for name1 in nameset1:
        for name2 in nameset2:
            if not bool(isSameName(name1, name2)):
                return False
    return flag
def isSameName(name1, name2):
    global aisian_names
    '''
    same: return True
    different: return False
    '''
    #姓氏和首字已经相同
    name1_list = re.findall(r'[^\-\s]+', name1.replace('.','. '))
    name2_list = re.findall(r'[^\-\s]+', name2.replace('.','. '))
    #名字
    flag = '1' #全名相同
    if (len(name1_list) == len(name2_list)):
        for i in range(len(name1_list)):
            part1 = name1_list[i]
            part2 = name2_list[i]
            if ('.' not in part1)&('.' not in part2):
                if part1!=part2: ## 没缩写的话，名字相同
                    return False  
            else:## 有缩写的话，首字母相同
                if part1[0]!=part2[0]:
                    return False
                else:
                    flag = '2'
#                 print(flag)
    else:
        flag = '2' #这种情况下只能是相容
        for i in range(min(len(name1_list), len(name2_list))):
            part1 = name1_list[i]
            part2 = name2_list[i]
            if ('.' not in part1)&('.' not in part2):
                if part1!=part2: ## 没缩写的话，名字相同
                    return False  
            else:## 有缩写的话，首字母相同
                if part1[0]!=part2[0]:
                    return False
    return flag
            

In [None]:
def ifSameAff():
    global aff_names, aname_list, aid_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict
 
    i = 0
    while i<len(aid_list):
        stop_set = []

        for j in range(i+1, len(aid_list)):                                    
                ## 如果两个人有多个机构，则只要其中一对机构满足相似性要求就判定为相似                    
            if isSameNameSet(aname_list[i], aname_list[j]):

                if authorAff_list[i] & authorAff_list[j]:
                    update_list(i,j)
                    stop_set.append(j)   

                
        del_list(stop_set)
        
        i+=1 # 除掉stopset/下一个人
    
    return True

def ifSameJournal():
    global aff_names, aname_list, aid_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict
 
    i = 0
    while i<len(aid_list):
        stop_set = []

        for j in range(i+1, len(aid_list)):                            
                ## 如果两个人有多个机构，则只要其中一对机构满足相似性要求就判定为相似
            if isSameNameSet(aname_list[i], aname_list[j]):

                if bool(journal_list[i] & journal_list[j]):
                    update_list(i,j)                                                              
                    stop_set.append(j)   

        del_list(stop_set)
        
        i+=1 # 除掉stopset/下一个人
    
    return True

def ifSimilar(sim_thres):
    global idf_dict, aff_names, aname_list, aid_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict
 
    i = 0
    while i<len(aid_list):

        stop_set = []
    
        for j in range(i+1, len(aid_list)):

            if isSameNameSet(aname_list[i], aname_list[j]):

                sims = []
                for aff_name1 in authorAff_list[i]:
                    for aff_name2 in authorAff_list[j]:
                        sims.append(calSim(aff_name1, aff_name2, aff_names))
                    
                if len(sims)!=0:
                    if max(sims) >= sim_thres:
                        update_list(i,j)
                        stop_set.append(j)
        
        del_list(stop_set)
        
        i+=1
    
    return True

def ifCoauthor(): #共引更新！

    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict
 
    i = 0
    while i<len(aid_list):
        stop_set = []
        for j in range(i+1, len(aid_list)):
                                    
            if isSameNameSet(aname_list[i], aname_list[j]):
                    
                if coauthorSet_list[i] & coauthorSet_list[j]: ## 如果两个人有共同的coauthor_list

                    update_list(i,j)
                    stop_set.append(j)
        
        del_list(stop_set)
        
        i+=1
    
    return True

def ifrefEachOther():
    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict

    i = 0
    while i<len(aid_list):

        stop_set = []
        
        for j in range(i+1, len(aid_list)):
            
            if isSameNameSet(aname_list[i], aname_list[j]):
                
                if (doi_list[j] & refSet_list[i]) & (doi_list[i] & refSet_list[j]):
                    update_list(i,j)
                    stop_set.append(j)
        del_list(stop_set)
    
        i+=1
    return True

def ifSameWholeName():
    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list, pair_set_dict

    i = 0
    while i<len(aid_list):

        stop_set = []
        
        for j in range(i+1, len(aid_list)):
            
            if isSameNameSet(aname_list[i], aname_list[j])=='1':

                update_list(i,j)
                stop_set.append(j)
                    
        del_list(stop_set)
    
        i+=1
    return True

In [None]:
def update_list(i, j):
    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list
    aid_list[i]=aid_list[i]|aid_list[j]
    aname_list[i]=aname_list[i]|aname_list[j]
    doi_list[i]=doi_list[i]|doi_list[j]
    refSet_list[i]=refSet_list[i]|refSet_list[j]
                
#   update_author
    if gid_list[j] in co_update_dict.keys():
        co_update_dict[gid_list[j]].update(aid_list[i])
    else:
        co_update_dict[gid_list[j]] = aid_list[i]
    coauthorSet_list[i]=coauthorSet_list[i]|coauthorSet_list[j]
    authorAff_list[i]=authorAff_list[i]|authorAff_list[j]
    journal_list[i] = journal_list[i]|journal_list[j]
    

def del_list(stop_set):
    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, co_update_dict, journal_list
    aid_list = [aid_list[k] for k in range(0, len(aid_list), 1) if k not in stop_set]
    aname_list = [aname_list[k] for k in range(0, len(aname_list), 1) if k not in stop_set]
    gid_list = [gid_list[k] for k in range(0, len(gid_list), 1) if k not in stop_set]
    doi_list = [doi_list[k] for k in range(0, len(doi_list), 1) if k not in stop_set]
    refSet_list = [refSet_list[k] for k in range(0, len(refSet_list), 1) if k not in stop_set]
    coauthorSet_list = [coauthorSet_list[k] for k in range(0, len(coauthorSet_list), 1) if k not in stop_set]
    authorAff_list = [authorAff_list[k] for k in range(0, len(authorAff_list), 1) if k not in stop_set]  
    journal_list = [journal_list[k] for k in range(0, len(journal_list), 1) if k not in stop_set]  

In [None]:
global co_update_dict, idf_dict, pair_set_dict
idf_dict = {}
co_update_dict = {}
pair_set_dict = {}

In [None]:
aid=[]
aname = []
gid=[]
doi=[]
refSet=[]
coauthorSet=[]
authorAff=[]

SIM_THRESHOLD = 0.15
count = 0

loop_group = {}

for key in tqdm.tqdm(sim_group.keys()):
           
    df = sim_group[key]
    
    if count%20000 == 0 :
        print('key=',key)

    global aid_list, aname_list, gid_list, doi_list, refSet_list, coauthorSet_list, authorAff_list, journal_list
    ## 每个组里动态更新
    aid_list=list(df.aid)
    aname_list=list(df.alterName)
    gid_list=list(df.gid)
    doi_list = list(df.doi)
    refSet_list = list(df.refSet)
    coauthorSet_list = list(df.coauthorSet)
    journal_list= list(df.journal)

    for idx in range(len(coauthorSet_list)):
        if coauthorSet_list[idx]&co_update_dict.keys(): ##如果这个集合里面有字典里的元素
            for item in coauthorSet_list[idx]&co_update_dict.keys():
                coauthorSet_list[idx].update(co_update_dict[item])

    authorAff_list = list(df.authorAff)

    if len(df)!=1: ## 组里只有一个人：直接跳过

        loop = True
        while loop:
            df_length =len(aid_list)
            
            ifSameAff() 
            ifSimilar(0.15)
            
            ifrefEachOther() #名字相似且符合条件1的人，认为是同一个人，合并
            ifCoauthor()
                        # df一直在更新
            if (len(aid_list)==df_length):
                    # 什么时候停止循环？一个循环下来，len(df)不变
                loop = False
        
        ifSameWholeName()
        ifSameJournal()
        
        loop = True
        while loop:
            df_length =len(aid_list)

            ifrefEachOther() #名字相似且符合条件1的人，认为是同一个人，合并
            ifCoauthor()
                        # df一直在更新
            if (len(aid_list)==df_length):
                    # 什么时候停止循环？一个循环下来，len(df)不变
                loop = False
                
    aid+=aid_list
    aname+=aname_list
    gid+=gid_list
    doi+=doi_list
    
    count += 1

In [None]:
author_paper_2 = pd.DataFrame({
    'aid':aid,
    'aname':aname,
    'gid':gid,
    'doi':doi,
})
author_paper_2