In [158]:
# This notebook runs enriches and cleans the email metadata. Steps:
# 1. extract emails from paper metadata
# 2. enrich based on doi lookup for biorxiv repository
# 3. enrich based on doi lookup for PMC papers
# 4. match addresses to authors based on name matching
# 5. resolve conflicts where many people match one email
# 6. clean email addresses based on several observed pathologies from metadata
# 7. store output.

# code by Ben Jones (ben.jones@uta.edu) for the CoronaWhy project.


In [159]:
import pylab
import matplotlib
import pandas as pd
import numpy as np
from collections import defaultdict
import copy
import pickle
import os
import json
import re
import requests
%matplotlib inline

In [160]:
# Metadata from the CORD release
meta = pd.read_csv("metadata.csv")

# Paths to the papers (loop over these)
Paths=["./CORD-19-research-challenge/noncomm_use_subset/noncomm_use_subset/","./CORD-19-research-challenge/comm_use_subset/comm_use_subset/","./CORD-19-research-challenge/biorxiv_medrxiv/biorxiv_medrxiv/","./CORD-19-research-challenge/custom_license/custom_license/"]

In [161]:
# Get author information from papers
#  This info lives in the metadata block in each file.

AuthorFirst=[]
AuthorLast=[]
AuthorEmail=[]
AuthorFull=[]
AuthorMiddle=[]
AuthorSuffix=[]
PaperIDs=[]
for Path in Paths:
    print(Path)

    Files=os.listdir(Path)
    for p in Files:

        readfile=open(Path+p,'r')
        paper=json.load(readfile)
        for author in paper['metadata']['authors']:
            AuthorFirst.append(author['first'])
            AuthorLast.append(author['last'])
            AuthorEmail.append(author['email'])
            AuthorMiddle.append(re.sub('\W+','', str(author['middle'])))
            AuthorSuffix.append(author['suffix'])
            AuthorFull.append(AuthorLast[-1] + ", " + AuthorFirst[-1] + " " + AuthorMiddle[-1])
            PaperIDs.append(p[:-5])
authors=pd.DataFrame({'sha':PaperIDs,'first': AuthorFirst,'middle': AuthorMiddle,'last': AuthorLast,'suffix': AuthorSuffix,'email': AuthorEmail,'full': AuthorFull})
authors=authors.replace('',np.NaN)
authors.to_csv("AuthorsFromMetadata.csv")


./CORD-19-research-challenge/noncomm_use_subset/noncomm_use_subset/
./CORD-19-research-challenge/comm_use_subset/comm_use_subset/
./CORD-19-research-challenge/biorxiv_medrxiv/biorxiv_medrxiv/
./CORD-19-research-challenge/custom_license/custom_license/


In [163]:
# Enrich from Biorxiv / Medrxiv websites
# The common format here means we can be very specific on extraction syntax

Path="./CORD-19-research-challenge/biorxiv_medrxiv/biorxiv_medrxiv/"

Files=os.listdir(Path)
TheMatch=0
FileWithFullText=[]
DOI=[]
for f in Files:
    matches=meta[meta.sha==f.split('.')[0]]
    if(len(matches)>0):
        TheMatch=matches.iloc[0]
        if(TheMatch.has_full_text):
            FileWithFullText.append(f.split('.')[0])
            DOI.append(TheMatch.doi)

def PullEmailsBiorxiv(text):
    text=str(r.content)
    if("mailto" in text): 
        pos=text.find("mailto:") 
        Corresp=text[pos:-1].split(":")[1].split('"')[0]
    else:
        Corresp=""
    Auth=""
    while(True):
        if("citation_author_email" in text):
            pos=text.find("citation_author_email")
            Auth=Auth+str(text[pos:-1].split('content="')[1].split('"')[0])+";"
            text=text[pos+50:]
        else:
            break
    return(Corresp,Auth[:-1])

count=0
EmailsCollected={}
SaveCSV=[]
OutFile=open("./BiorxivData.csv","w")
for d in range(0,len(DOI)):
    addr="https://doi.org/"+DOI[d]
    r = requests.get(addr)
    text=r.content
    # We'll only grab the full list, and forget about corredponding for now
    EmailsCollected[FileWithFullText[d]]=PullEmailsBiorxiv(text)[1]
    count=count+1
    if(count%100==0):
        print("Processed "+str(count)+" of " + str(len(DOI)) + " records")
        # print(addr)
        # print(FileWithFullText[d],EmailsCollected[FileWithFullText[d]])


Processed 100 of 1053 records
Processed 200 of 1053 records
Processed 300 of 1053 records
Processed 400 of 1053 records
Processed 500 of 1053 records
Processed 600 of 1053 records
Processed 700 of 1053 records
Processed 800 of 1053 records
Processed 900 of 1053 records
Processed 1000 of 1053 records


In [164]:
PCMPapers=meta[(meta.source_x=='PMC') & meta.notna().sha &meta.notna().doi & meta.has_full_text]

In [165]:
PCMPapers.doi.values

array(['10.1186/1471-2458-3-5', '10.1186/gb-2003-4-5-213',
       '10.1186/1471-2350-4-9', ..., '10.1016/j.celrep.2015.05.012',
       '10.1016/j.ijppaw.2014.06.003', '10.1016/j.onehlt.2017.03.001'],
      dtype=object)

In [166]:
# Enrich from the PCM papers. Here was only reliably
#  extract one email per paper, for now.

def PullEmailPCM(text):
    Email=""
    try:
        if("mailto" in text): 
            pos=text.find("mailto:") 
            Email = text[pos:-1].split(":")[1].split('"')[0]
            if(("info" in Email) or ("webmaster" in Email) or ("biomedcentral" in Email) or (len(Email)>40)or ("editor" in Email)or ("journal" in Email)):
                Email=""
    except:
        return("")
    return(Email)

#Loop through dois pulling emails
count=0
for p in PCMPapers.index:
    addr="https://doi.org/"+PCMPapers.doi[p]
    try:
        r = requests.get(addr)
    except:
        continue
        
    text=r.content
    EmailsCollected[PCMPapers.sha[p]]=PullEmailPCM(str(text))
    count=count+1
    if(count%100==0):
        print("Processed "+str(count)+" of " + str(len(PCMPapers.index)) + " records")
    #    print(addr)
    #    print(PCMPapers.sha[p],EmailsCollected[PCMPapers.sha[p]])


Processed 100 of 14729 records
Processed 200 of 14729 records
Processed 300 of 14729 records
Processed 400 of 14729 records
Processed 500 of 14729 records
Processed 600 of 14729 records
Processed 700 of 14729 records
Processed 800 of 14729 records
Processed 900 of 14729 records
Processed 1000 of 14729 records
Processed 1100 of 14729 records
Processed 1200 of 14729 records
Processed 1300 of 14729 records
Processed 1400 of 14729 records
Processed 1500 of 14729 records
Processed 1600 of 14729 records
Processed 1700 of 14729 records
Processed 1800 of 14729 records
Processed 1900 of 14729 records
Processed 2000 of 14729 records
Processed 2100 of 14729 records
Processed 2200 of 14729 records
Processed 2300 of 14729 records
Processed 2400 of 14729 records
Processed 2500 of 14729 records
Processed 2600 of 14729 records
Processed 2700 of 14729 records
Processed 2800 of 14729 records
Processed 2900 of 14729 records
Processed 3000 of 14729 records
Processed 3100 of 14729 records
Processed 3200 of

In [167]:
# Now we need to run through matching emails to names



#We'll distinguish between 4 strengths of match:
# 4 - only one author and only one email exists
# 3 - match both first and last name in email address
# 2 - match last name
# 1 - match first name
# in case of conflicts, the highest stength option wins.
meta=meta[pd.notna(meta.sha) & meta.has_full_text]
strengths=np.zeros(len(authors.index))
authors.insert(4,'strengths',strengths)

for a in authors.index:
    record=authors.loc[a]
    paper=record.sha
    if(pd.isna(record.email) and (paper in EmailsCollected.keys()) and EmailsCollected[paper]!=''):  # only try to fill a gap if there is a gap to fill

        emailaddresses=EmailsCollected[paper].split(";")
        # use these to count matches
        matchesboth=-1
        matchesfirst=-1
        matcheslast=-1
        match=-1
        
        first=str(record['first']).lower()
        last=str(record['last']).lower()
       
        for ei in range(0,len(emailaddresses)):
            e=emailaddresses[ei].lower()
            
            # matching first and last name
            if((first in e) and ( last in e )and (len(last)>1)and (len(first)>1)):
                if(matchesboth>0):
                    print("   Uh oh... two double matches!")
                    print("   they are " + emailaddresses[ei]+", "+emailaddresses[matchesboth])
                matchesboth=ei
            # matching surname only
            elif ((last in e )and (len(last)>2) and pd.notna(record['last'])):
                if(matchesboth>0):
                    continue
                matcheslast=ei
            # matching first name only
            elif ((first in e ) and (len(first)>2) and pd.notna(record['first'])):
                if((matchesboth>0) or(matcheslast>0)):
                    continue
                matchesfirst=ei
                
            
            # find strongest match
            strength=0

            if((len(emailaddresses)==1) and (sum(authors.sha==paper)==1)):
                match=0
                strength=4 # unique combination
            elif(matchesboth>-1):
                match=matchesboth
                strength=3 # first and last name
            elif(matcheslast>-1):
                match=matcheslast
                strength=2 # last only
            elif(matchesfirst>-1):
                match=matchesfirst
                strength=1 # first only
                
                
        #If weg got a match, store it in the authors table
        if(match>-1):
            print(strength, authors.sha.loc[a], authors['first'].loc[a], authors['last'].loc[a], emailaddresses[match])
            authors.email.loc[a]=emailaddresses[match]
            authors.strengths.loc[a]=strength

2 01aa247284f0baf43293d8ea335b70781365a985 Heungsup Sung sung@amc.seoul.kr


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


1 041adbe00a7ca71fcbc38ff4406297ce86e93a6a Rossetto nan erossetto@usp.br
2 054831a6a11886b5ad44cac71d17e367721a2b5b Julian Hiscox j.a.hiscox@leeds.ac.uk
2 0612bb6d157e8cd266943d84daf110c46a56aa23 Sooyeon Suh alysuh@sungshin.ac.kr
1 07241235ee553a5b165ed19b2091101128eca805 Helen O&apos;shea helen.oshea@cit.ie
1 0aa3eda0fb09bf9f55864fc2b9157b9317f62323 Robson Bruniera-Oliveira robson.bruniera@gmail.com
2 0dc8d11784da63b899dbb2b404be4efd330e4ac3 Dongming Zhou dmzhou@sibs.ac.cn
2 0e78935692851b0466f5a9234adb024e7735c8a3 nan Chua chuakb@tll.org.sg
4 0f6091029a2cc8ddd6a5c6ee18de7478b73daab2 Emilio Espínola emilioespinola@hotmail.com
2 104e60eede9e4016c243f20f4541a33852ea3d1c Jin-Hwa Moon jinhwamoon@hanyang.ac.kr
4 105b993dee9f9e44637daa2d43e66f3ba3039fbf Esteban Domingo edomingo@cbm.uam.es
2 107ff91ec95dea35a80145aebbe5322c08c57e5e N Bellei nbellei@uol.com.br
2 11ebffa99e4b093c6304549246b534983012f909 Ji-Eun Ban jeban@naver.com
2 129fe5d1e963bc655146edf42a382819deca0ce6 Adekunle Adeoti Kadeo

2 92aca69f518fc394bd00b525898264893d9550af Young Jeon yjjeon@cnu.ac.kr
2 93f3546142cf477c77ef113ed4ff98a881a2c8c1 Piper Treuting treuting@uw.edu
4 953ce6c11451011ff271b730ac3d2bdadda75830 Ock-Joo Kim ockjoo.kim@gmail.com
2 9547b74fb56cd1288d9af6facbd337148cf45895 Matos Soeiro alexandre.soeiro@bol.com.br
2 9665e16e8fd0e3198298f64f70cc9180d1cea36a Biswendu Goswami bisgoswami@gmail.com
3 972a4e182f215c25fe41c9e0716a2f14ffd2ed9c Jianguo Xu xujianguo@icdc.cn
3 9b2f83db4c307ae17c146bb6625ddb01474bbd9a Rong Kuan kuanrong.chan@duke-nus.edu.sg
2 9b2f83db4c307ae17c146bb6625ddb01474bbd9a nan Chan kuanrong.chan@duke-nus.edu.sg
2 9b2f83db4c307ae17c146bb6625ddb01474bbd9a Eugenia Ong kuanrong.chan@duke-nus.edu.sg
2 9f7ffa3a1bbf4a6ad9e71f2377fb45564589d415 Ji-Youl Jung jungjy1982@jejunu.ac.kr
2 a2add35b84a53cbad3301093f4191d5edadf1d0e Ki Hahm hahmkb@cha.ac.kr
4 a70f7904950bba73ba43df71226c57344b28aa33 São Paulo tterezinha@uol.com.br
4 aad248c894ac1b35e2adad9e23c9cf40dc3c20aa Margaret Gardel theriot@st

1 03797126b6e6689cc02878e641e5271d3b9a5d3b Chun Id jjhchung@snu.ac.kr
1 03ca47766e6f3e8d5d71fa25d86803f82d690b38 Sun nan gbsun@implad.ac.cn
2 03ca47766e6f3e8d5d71fa25d86803f82d690b38 Xiao-Bo Sun gbsun@implad.ac.cn
2 048b240dcabbd623a6cda8c4236ea50d7961315c Vincent Nijman vnijman@brookes.ac.uk
2 04abbb8d91bd1a15a6c70ce3c0bd339f92468265 Charlanne Burke cburke@rockfound.org
2 04bc03c90437934a75fc6fdc228817234ef84c3a Zhenhua Dai zdai2009&#x00040
2 04d638b3c1d8b8fcd771e569e439b364d4ac2c1a Ty Hang hangvtt@oucru.org
1 04d638b3c1d8b8fcd771e569e439b364d4ac2c1a Han Ny hangvtt@oucru.org
3 05257a2230897ea006b3f68dbf0d71e1e7216f55 Paul Schnitzler Paul_Schnitzler@med.uni-heidelberg.de
3 0542a6ae26f57a5ffa35dcfebdec5004ff45688c Ruth Lynfield Ruth.Lynfield@state.mn.us
2 05774f9508a8c3399ffb20e758f933ffadc40fbd Ioannis Nezis I.Nezis@warwick.ac.uk
1 058b0cc7cecd43d80ed47196cb5376fa10efb58a Miriam Cebey-López Miriam.cebey.lopez@sergas.es
2 05a94d5f5083ff58320c659cd519536d3e6aa4a3 H Wodrich harald.wodrich

2 176274e5fb44083ebc8c91d87e013d624533c7f6 J August taugust@jhmi.edu
2 1763110ecc1742f6158f410a0c4f0b923118fd79 Yusuke Ohba yohba@med.hokudai.ac.jp
2 178363fb71ad0ae91e621e6e51fc03bb521f5695 Hee-Won Park heewon.park@utoronto.ca
2 17d92d371f5552921b86de6fb6cd7e65344ddb2e Soon Lee solee5301@gmail.com
3 17fe16cf66ebbe693a2e75dda11d14513fec7519 Thi Thanh thuy.nguyenthithanh&#x00040
3 17fe16cf66ebbe693a2e75dda11d14513fec7519 Thuy Nguyen thuy.nguyenthithanh&#x00040
1 17fe16cf66ebbe693a2e75dda11d14513fec7519 Thi Tram thuy.nguyenthithanh&#x00040
1 17fe16cf66ebbe693a2e75dda11d14513fec7519 Anh Tran thuy.nguyenthithanh&#x00040
2 17fe16cf66ebbe693a2e75dda11d14513fec7519 Van Nguyen thuy.nguyenthithanh&#x00040
2 18073920e12ee20d939dee5116c586da8accfd82 David Alland allandda@rutgers.edu
3 18176d1030fff6410b83e0df1591b3617fc739cb Gary Whittaker gary.whittaker@cornell.edu
1 18421f153f74c6b0f8f9efe88c2aad6b70547b99 Fuad Iraqi fuadi@post.tau.ac.il
3 184c8c4637d343cd81dddd0d3f5e6c922f538afe Christophe Mar

2 295d0e530c6b6d9df5c46872c36bf15dc85507fa Xi Han zhangyunzhi1818@163.com
1 295d0e530c6b6d9df5c46872c36bf15dc85507fa Yunzhi Zhangid zhangyunzhi1818@163.com
3 298ebce4397c9735f69cb5fcf9ad82881eead18f Yechiel Shai Yechiel.Shai@weizmann.ac.il
4 2990612686e9b3dc6819d62a2daeec9c857a98e9 Graham Pawelec graham.pawelec@uni-tuebingen.de
3 29ada2d0f89dbeee105c3f95b7df0ab204d9a444 David Warrilow David_Warrilow@health.qld.gov.au
2 29baaba300edea006f7b4d287edd3426d6573a26 Philippa Matthews p.matthews@doctors.org.uk
2 2a8e26038c98efac0a61629aa6b768fefad5a573 Jingguang Wei weijg@scau.edu.cn
1 2a8e26038c98efac0a61629aa6b768fefad5a573 Wei nan weijg@scau.edu.cn
1 2a8e26038c98efac0a61629aa6b768fefad5a573 Wei nan weijg@scau.edu.cn
2 2a8e26038c98efac0a61629aa6b768fefad5a573 Shina Wei weijg@scau.edu.cn
2 2aa31ef3d9d80509a965846122911a3fc53f7a99 Menegazzi Gotte giovanni.gotte&#x00040
2 2ab24bbc71562ee67437db9fee7f83edb40f4de9 Masayuki Saijo msaijo@nih.go.jp
1 2b244041ab6f2ab167b76c5b17332c5598b56431 Ishtiaq 

2 3ebbe746b0f09acf2bef30ee83a8fb7bb9cdc537 Arup Banerjee banerjeea@thsti.res.in
2 3ec7aa1d4381bbaa7f5fa69fa8eb7cc1d90a39f1 Xiuhua Guo guoxiuh@ccmu.edu.cn
3 3ed1483725e4ea6abcdbf93585eeccde903202fd Moran Ki moranki@naver.com
2 3ed400293b6f0963dcffb081e94de5229fb42cc7 Y Chan ykchan@fas.harvard.edu
1 3f1a7bb419c2658dfd71663cc87efd03a38f1716 Yin Li muziyin08@163.com
2 3fbbb5c2047570f4c33a88cb927a6f54e83de658 S Dube simant.dube@fluidigm.com
3 3fe4e2a98af36485a9a4dc93c30eef62522af0e0 Pei Liu syliupei2003@yahoo.com.cn
1 405e67def78a29e92a4de5e016d0334550153142 Sunil Kumar sunilks@mail.sysu.edu.cn
3 409e8fa9c8b69d7982ac0ecc28437a2e8a246d2a Moran Ki moranki@naver.com
2 40a57ffe8adda95a13b552b630b13d76344f666c Cassandra Jones jonesc@ksu.edu
2 40d0c093a36d66c91a2b8cd24e13c14d7f0c7359 Leonie-Sophie Hecht leonie.hecht@altona-diagnostics.com
2 412833ef524797198177f582425f82e45c0f7a06 P Nagy pdnagy2@uky.edu
2 417006f8744a4d8068ce146b06db09bbd48eaad2 J-F Wang jfwang8113@sjtu.edu.cn
2 423e1f15afb860120

1 54c8da2f34d0e830818e213cfa74e105e10dac38 Philippe Parola philippe.gautret@club-internet.fr
3 54c8da2f34d0e830818e213cfa74e105e10dac38 Philippe Gautret philippe.gautret@club-internet.fr
3 54cbbaf44335fdc1995cb19e5a08779929fd0966 Heide Reil heide.reil@viro.med.uni-erlangen.de
1 5503464f5c8ac4db987ce87b05f224a7aaafc1a7 Chen nan chenganchun&#x00040
1 5503464f5c8ac4db987ce87b05f224a7aaafc1a7 Cheng nan chenganchun&#x00040
2 5503464f5c8ac4db987ce87b05f224a7aaafc1a7 Shun Chen chenganchun&#x00040
2 5538bf904bcd32abf4ca1dbe9fc7e6c7514ebeda K Mcintyre k.m.mcintyre@liv.ac.uk
2 5566d234b7245b606a50f808fe09b5e3a4f04711 A Peel alisonpeel@gmail.com
2 5581ec03ebdca073e13b0df6329779940908f038 Berend-Jan Bosch b.j.bosch@uu.nl
3 55b7d9b5320f9763edab4a3d32586bbf5dc2c487 Jimmy Twin jimmy.twin@mcri.edu.au
3 56948f3e281e7271f952181ed0c3bd83d1d70f8e Nan nan nanyuchen2015&#x00040
2 56bea8bc53d2703d7d33244508932aa26d1ad442 Yijun Ruan ruanyj@gis.a-star.edu.sg
2 56fb26305485a7d9c7844df6b25e30c8f1af4363 Soon Lee 

3 68c13cb464cbe2b35102008464c5c196f7c122c1 William Valdar william.valdar@unc.edu
2 690ab99a21dca381b585a6a7d32f1e5c01f1e52c Lucy Reiss kreiss@ukaachen.de
2 692088c942db684cd4e1ec03c927eb3ae6f6caf6 Li-Ping Zhu zhulp@fudan.edu.cn
1 693a04c2c05485c6f19c6fc281ebf65dc4fe06dc Nan Zhang nanyuchen2015@nwsuaf.edu.cn
2 693d0f533004851d03a1f38a336f6aa943ea2733 D Liu dxliu@ntu.edu.sg
3 69a50ad03d5fb9a5129663062a9b718217b62665 Hergen Spits Hergen.Spits@amc.uva.nl
2 69bce356ec4ad3fcc2a0d7ba9ecfaf0d90d8e423 A Kinjo akinjo@protein.osaka-u.ac.jp
2 69ed0332751ba504d01cf6d6c266c9ae499ceb38 J Tang jwtang49@hotmail.com
2 6a0f4aac2dfa46df489b653a06df4b5036e452ea Sumana Sanyal sanyal@hku.hk
3 6a0f4aac2dfa46df489b653a06df4b5036e452ea Sanyal@hku Hk sanyal@hku.hk
2 6a2d0c94c7467bc93eab84eb348c458e85738aa3 Gyubyung Park parky@kangwon.ac.kr
2 6a2d0c94c7467bc93eab84eb348c458e85738aa3 Yung Park parky@kangwon.ac.kr
2 6a776cdff97d90eae72e7f8be666aef3a14224e2 Shan-Lu Liu liushan@missouri.edu
3 6aaa4563100d784e74766eac

2 7862ffdcf951e5860a7e3dc66c9d3b07f5fbc281 Chen Shen shenad16@hotmail.com
2 7862ffdcf951e5860a7e3dc66c9d3b07f5fbc281 A-Dong Shen shenad16@hotmail.com
2 7875c1a9783593b90bb90b99f285583067a3f433 Stephan Becker becker@staff.uni-marburg.de
1 78b06344bf1d834407a41bceb4999839085b0082 Xia nan xiachun&#x00040
3 78b06344bf1d834407a41bceb4999839085b0082 Chun Xia xiachun&#x00040
2 78f939545e7217684295ab63900119ab1ebdb173 Heinz Feldmann feldmannh@niaid.nih.gov
3 79316a90d0cd339b0d8d40407555b253994fd833 Patrick Fraering Patrick.fraering@epfl.ch
2 795bd84388973214e4b97ea23b80a9dc4e481117 Jacob Yount yount.37@osu.edu
1 796664228da20a93c66ea8c1956207c7704dca06 Nagao nan pnagao@uol.com.br
1 7a16fe9d867203bf83dfe50ef6dd93b8e1179db7 Blanco Jcg j.blanco@sigmovir.com
2 7a32e94a4c479957e6c98108ac3eaa344bc07f7d Kevin Lafferty lafferty@lifesci.ucsb.edu
2 7a5478dfe79fbf67551ac9261f190d899ba719b3 Wu-Chun Cao caowc@nic.bmi.ac.cn
2 7a5e362c102b0d9d73857a7e10df920d3a5e6334 M Poritz mark_poritz@idahotech.com
2 7aec

3 8c701ddbdb052d08eabd99191bd51916a2656ef4 Bin Du dubin98@gmail.com
2 8cd344d423cfae8a07936b58eb1e172fa62bcc5d Thomas Lane tlane@uci.edu
1 8d0367ee4b4bcd6fea276d8daa9bd2f50e401a34 Tatsuya Nagata tatsuya@unb.br
2 8d095d0275e474dbb9d9b63a75591ff2c0667d73 Vincent Deubel vdeubel@sibs.ac.cn
2 8dd14ffdb898c891d284cff71623b3e43ec0b0fe Zhiyuan Wen zywen7@yahoo.com
2 8deca5b29c3d3c0b11f1c9032a981f95dc8d1619 S Hoppe sebastian.hoppe@ibmt.fraunhofer.de
3 8e15c84010f5ea9e602ae6e51f9ac12ee754a9c9 Ramin Asgary ramin.asgary@caa.columbia.edu
2 8e4a1132b0301964add6af40ca83b222d7f6d9e3 S Zhang syzhang@bio.ecnu.edu.cn
3 8eefe017d2a4fedeac3243fb76f3b417b16023f2 Mathieu Bourgarel mathieu.bourgarel@cirad.fr
2 8f3221b42c66b835706134994f7b71f10f9b104d Denae Wagner dcwagner@ucdavis.edu
2 8f5fc8690f47c0c30dd99bd8d84f9e80f25fdcc8 Michael Chan mchan@hkucc.hku.hk
3 8fa2be5d5abb0473dda3892cf6f42a318f87b39b Xianjin Wu hhwuxianjin@163.com
2 9027b4d1324273bed34f667f3d61d2536e9fd316 Y Han shan.lu@umassmed.edu
2 90624629

2 a2c46da8970ae5d4e6b3b1f4d4d30a2ea426d1ff Brenda Coleman bcoleman@mtsinai.on.ca
4 a2ea85a02fee49f55f485a2b5d808636cb38a0bb nan Opriessnig veterinary.research@jouy.inra.fr
2 a30fd6139467d0ef97598274d3d926ddf4623e88 Suneth Agampodi sunetagampodi@yahoo.com
2 a3f75f7f759c60426c7a47b9cd2470a70ef244c0 Dongwan Yoo dyoo@illinois.edu
3 a401eee90cc270520c65bc001f31a617f4edb7df Sean Ekins ekinssean@yahoo.com
2 a445e4cc0146046d191344e3b61cd8dd4f33eb83 Christian Drosten drosten@virology-bonn.de
2 a447f42b4f4ed16afea798513f8c1409ee2252fa F Jean fjean@mail.ubc.ca
1 a4995af42a012dd1b7f8da3936d79d78d0e24405 Han Zhang yanghanchun1@cau.edu.cn
1 a4995af42a012dd1b7f8da3936d79d78d0e24405 Yan Li yanghanchun1@cau.edu.cn
3 a4995af42a012dd1b7f8da3936d79d78d0e24405 Hanchun Yang yanghanchun1@cau.edu.cn
2 a4e2f812c3232e60ac1af78fed10d292e28906d1 Sylvain Delisle sdelisle@umaryland.edu
2 a4ffcadecc4b60c30df8f699c480724523272e62 Ziwen Wang wang98h@263.net
2 a4ffcadecc4b60c30df8f699c480724523272e62 Lizhong Wang wang9

1 b6c38211730ff7826aa6a52aff9e3be848f8e738 Bao nan wbbao&#x00040
2 b6c38211730ff7826aa6a52aff9e3be848f8e738 Wenbin Bao wbbao&#x00040
4 b6eed04e43ad653c06247b10ab05d03bdccf04bc Christophe Fraser c.fraser@imperial.ac.uk
2 b708da1945a7a42cd61526e043d527592d4d9518 D Brighty d.w.brighty@dundee.ac.uk
2 b710adc78b56eb091aa40bc4cf5bc015e3f567e4 Y Voronin yvoronin@fhcrc.org
1 b7240857da81a0bee7cfb964a7e838f7f1120b46 Miguel Ngel Muñoz-Alía alia.miguel@mayo.edu
3 b72fc0df422070e077cfd05d8db51dfb059b9fd7 Anthony Kicic anthony.kicic@telethonkids.org.au
2 b786e66932955124f073e42aec53ea4a0f0cbb9b C Lanata clanata@iin.sld.pe
2 b7b122d6d4cc409f7a22bec63936640b4cd0f334 Ding Liu dxliu0001@163.com
2 b7bd823d7175ee551d3ba8508e5457d650056fd6 X Liu dxliu@imcb.a-star.edu.sg
2 b7bd823d7175ee551d3ba8508e5457d650056fd6 D Liu dxliu@imcb.a-star.edu.sg
2 b7cf054fd2a9f9366458e8c251a37bda87fca328 S Elena santiago.elena@csic.es
4 b7de4f4a99e8da86891ed28bca52afcbcbdabfa1 nan Gerber veterinary.research@jouy.inra.fr
2 b7

2 c82205e91bbd1135020b4fdf0d9df2654c66650a K White kawhite@yorku.ca
2 c848bce30b9e60bccfd15a5534af1d0cea54c686 Zhaozhong Zhu zhuxibysy@163.com
3 c848bce30b9e60bccfd15a5534af1d0cea54c686 Xi Zhu zhuxibysy@163.com
1 c851a889568efd469f724ebacfe3270946f3e04f Linda Faye Van Dyk Linda.VanDyk@uchsc.edu
2 c851e8a17951dc6f713c2a832e6a516f72154a79 T Lane tlane@uci.edu
2 c8978a8074a9df4bf07e49393821886d0b80979f Claire Midgley cmidgley@cdc.gov
2 c933e09cb9262b2edc2394b1a7d86357da840493 Patrick Woo pcywoo@hku.hk
3 c933e09cb9262b2edc2394b1a7d86357da840493 Pcywoo@hku Hk pcywoo@hku.hk
3 c933e09cb9262b2edc2394b1a7d86357da840493 Woo Pcy pcywoo@hku.hk
2 c93e251c48b43dfc3e162039968b8ce59810c410 R Savan savanram&#x00040
3 c99be1ffda777de5b7f9e0ed6c04fd70ce0bc3e1 Volker Thiel volker.thiel@kssg.ch
2 c9e24b269c1f1772454a1e93cc72c85251aeda6b Wenjie Tan tanwj28@163.com
2 ca1f90fcd49058d95c081f507c5496b6b962c982 Gajendra Raghava raghava@iiitd.ac.in
4 ca2a9474b1355a82b175767d68aaba4995e60681 Mark Denison ?subject=

1 da7fb6a91b2595d009f5e9f60a45cc0a45fceac7 Nan Zhang nanyuchen2015@nwsuaf.edu.cn
2 da81f0d3a12ab7faa09148acb6564271474e9e02 Zheng-Li Shi zlshi@wh.iov.cn
2 da867b3ea92b5191c4fbdc4d09c0712a73da7766 Jonna Mazet jkmazet@ucdavis.edu
3 daa3f7d4838adebf0aac1be3cda2924fc1d2106a Gavin Oudit gavin.oudit@ualberta.ca
1 db2710799be5aaa4eacbc75a921c6d4cc9364538 Ahmed Kandeil mohamedahmedali2004@yahoo.com
1 db2710799be5aaa4eacbc75a921c6d4cc9364538 Ahmed El-Taweel mohamedahmedali2004@yahoo.com
1 db2710799be5aaa4eacbc75a921c6d4cc9364538 Ahmed Kayed mohamedahmedali2004@yahoo.com
1 db2710799be5aaa4eacbc75a921c6d4cc9364538 Ahmed Mostafa mohamedahmedali2004@yahoo.com
1 db2710799be5aaa4eacbc75a921c6d4cc9364538 Ahmed Saad mohamedahmedali2004@yahoo.com
3 db2710799be5aaa4eacbc75a921c6d4cc9364538 Mohamed Ali mohamedahmedali2004@yahoo.com
2 db493d400b682be0385bd1ff034fa718d0c398cb Saad Omer somer@emory.edu
3 db7abfe9ffd34b558951907efa3be430d7bbf28b Ting Wu wuting@xmu.edu.cn lzscdclmq@126.com
2 db7b6440cfdfd2bbf7

1 ecfcf1c18796bfdf5dfd83193448b5c2f9fbdb81 Wan nan xcwan@ahau.edu.cn
2 ecfcf1c18796bfdf5dfd83193448b5c2f9fbdb81 Xiaochun Wan xcwan@ahau.edu.cn
2 ee019cb6808f738c245cc9de12b08a19951ef594 Fang Guo ju-tao.guo@bblumberg.org
1 ee019cb6808f738c245cc9de12b08a19951ef594 Guo nan ju-tao.guo@bblumberg.org
3 ee6d70a53e3262cea6f85bd8b226f6b4c8b5f64b Koussay Dellagi koussay.dellagi@ird.fr
2 eec9198400dee7d7ec27088377bced55c01a126a H Virgin virgin@wustl.edu
1 eef4cc12b84be0a4217da828b75f06529ba5d01b Fernando Pardo-Manuel De Villena fernando@med.unc.edu
2 ef720b08c80cb574e3db1e48be65750ebb4b5104 Cecilia Johansson c.johansson&#x00040
2 ef872b80cf38917f64c42bfa52a57beb4399897a Gary Nabel gnabel@nih.gov
2 efaa556b484fbcd9cc34832ffac53ef3e834e9c0 Jae-Hoon Kim cjkim@cnu.ac.kr
2 efaa556b484fbcd9cc34832ffac53ef3e834e9c0 Tae-Hwan Kim cjkim@cnu.ac.kr
2 efb09a669665f5e0e097c2d5e94c499a6e43bd9e Richard Zhao rzhao@som.umaryland.edu
1 efbd0dfc426da5dd25ce29411d6fa37571623773 Juan De La Torre juanct@scripps.edu
3 e

1 0139ea4ca580af99b602c6435368e7fdbefacb03 Anshu Bhardwaj anshu@imtech.res.in
3 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Shengjie Lai shengjie.lai@soton.ac.uk
1 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Isaac Bogoch isaac@bluedot.global
2 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Nick Ruktanonchai n.ruktanonchai@soton.ac.uk
1 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Alexander Watts alexander@bluedot.global
1 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Xin Lu xin_lyu@sina.com
1 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Kamran Khan kamran@bluedot.global
2 013d9d1cba8a54d5d3718c229b812d7cf91b6c89 Andrew Tatem a.j.tatem@soton.ac.uk
2 01d162d7fae6aaba8e6e60e563ef4c2fca7b0e18 Chunming Zhang cl.zhang@philrivers.com
3 01d162d7fae6aaba8e6e60e563ef4c2fca7b0e18 Yubo Zhai zhaiyubo17g@ict.ac.cn
2 01d162d7fae6aaba8e6e60e563ef4c2fca7b0e18 Zhonghai Zhang cl.zhang@philrivers.com
2 01d162d7fae6aaba8e6e60e563ef4c2fca7b0e18 Chunli Zhang cl.zhang@philrivers.com
2 01d162d7fae6aaba8e6e60e563ef4c2fca7b0e18 Yiqing X

2 0fbb18050e29ca78191625d42576b1c574027377 Yinhua Zhang zhangy@neb.com
2 0fbb18050e29ca78191625d42576b1c574027377 Nathan Tanner tanner@neb.com
1 0fd300aefb704c20f32152b97b6194015f1c74e7 Huijuan Zhou huijuan@stat.tamu.edu
3 0fd300aefb704c20f32152b97b6194015f1c74e7 Guannan Gao gaoguannan@tamu.edu
3 0fd300aefb704c20f32152b97b6194015f1c74e7 Linglin Xie linglin.xie@tamu.edu
2 0fd300aefb704c20f32152b97b6194015f1c74e7 Ke Zhang kurtzhang00@gmail.com
2 0ff4ad5359ee2df3568605385a8616d8da66bf2c Benjamin Bolker bbolker@gmail.com
3 0ff4ad5359ee2df3568605385a8616d8da66bf2c David Champredon david.champredon@gmail.com
2 0ff4ad5359ee2df3568605385a8616d8da66bf2c David Earn earn@math.mcmaster.ca
3 0ff4ad5359ee2df3568605385a8616d8da66bf2c Michael Li wzmichael.li@gmail.com
2 0ff4ad5359ee2df3568605385a8616d8da66bf2c Joshua Weitz jsweitz@gatech.edu
2 0ff4ad5359ee2df3568605385a8616d8da66bf2c Bryan Grenfell grenfell@princeton.edu
2 0ff4ad5359ee2df3568605385a8616d8da66bf2c Jonathan Dushoff dushoff@mcmaster.ca
2

2 18dc6a67045d2687d2b5c11c85797f42824ed243 Qingwu Jiang jiangqw@fudan.edu.cn
2 1983cbc32412a9a7b1601b6778605f9e98ab69e0 Nicole King nking@berkeley.edu
2 19bc9648d12dc6b7ca757f82dcd182c03111691e Zhangfu Fang upyifang@163.com
3 19bc9648d12dc6b7ca757f82dcd182c03111691e Kang Wu kangwu410@foxmail.com
2 19bc9648d12dc6b7ca757f82dcd182c03111691e Kefang Lai klai@163.com
1 19bc9648d12dc6b7ca757f82dcd182c03111691e Xizhuo Sun xizhuomd@163.com
1 1a12cc7d49d8521bec5d447ebb413cbcf5aca8f4 Jun Xing xujun0304@163.com
3 1a12cc7d49d8521bec5d447ebb413cbcf5aca8f4 Jun Xu xujun0304@163.com
2 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 nan Cheng chengpk1002@163.com
2 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 Wen Chen yaokaichen@hotmail.com
3 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 Yaokai Chen yaokaichen@hotmail.com
2 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 Junjie Chen yaokaichen@hotmail.com
3 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 Da Huo huoda_tmmu@163.com
3 1a9d52bf91d858565ab0ccc8cab03e919ef6e5c3 Dayu Sun da

2 2634ab4b2f262ef28c5cdb34759ff3cae6c6187f Krzysztof Pyrc k.a.pyrc@uj.edu.pl
3 264a816e7099246c13a5dad3a581a474d85d50bd Dong Chen chendong_wz@126.com
3 264a816e7099246c13a5dad3a581a474d85d50bd Qifa Song qifasong@126.com
3 264a816e7099246c13a5dad3a581a474d85d50bd Feifei Su feifeisuzs@163.com
3 264a816e7099246c13a5dad3a581a474d85d50bd Jianyi Dai daijianyi2008@163.com
3 267729947ca478946d5a4bffb8e13d50c3545120 Yihui Xu xuyihuih@163.com
2 26e75d3c815aae7fd9b094c3e5c74d3f7132ca13 E Mullins edward.mullins@imperial.ac.uk
2 26e75d3c815aae7fd9b094c3e5c74d3f7132ca13 D Evans david.evans@nbt.nhs.uk
2 26e75d3c815aae7fd9b094c3e5c74d3f7132ca13 R Viner r.viner@ucl.ac.uk
2 26e75d3c815aae7fd9b094c3e5c74d3f7132ca13 E Morris epmorris@rcog.org.uk
   Uh oh... two double matches!
   they are bingzhao@fudan.edu.cn, bingzhao@fudan.edu.cn
   Uh oh... two double matches!
   they are bingzhao@fudan.edu.cn, bingzhao@fudan.edu.cn
3 2757708c4f72260abefb9d74302d868165cbc1a4 Bing Zhao bingzhao@fudan.edu.cn
2 2757708c4

2 2ea102f58147dab02e4dea90eb90dbc67149f678 Janet Chen gcheng@mednet.ucla.edu
1 2ea102f58147dab02e4dea90eb90dbc67149f678 Taijiao Jiang taijiao@ibms.pumc.edu.cn
2 2ea102f58147dab02e4dea90eb90dbc67149f678 Wenjie Tan tanwj@ivdc.chinacdc.cn
3 2eac7de1c407d9eab13a6d538142cd21a814ee7b Jessica Manning jessica.manning@nih.gov
2 2eac7de1c407d9eab13a6d538142cd21a814ee7b Erik Karlsson ekarlsson@pasteur-kh.org
2 2f6efc4edc0c9fffd3acc810efd8616e7772b28a Giorgio Colombo g.colombo@unipv.it
3 2fe6b550f737baa47a5f2c8ab64cc3d9271c308a Alberto Aleta albertoaleta@gmail.com
3 2fe6b550f737baa47a5f2c8ab64cc3d9271c308a Yamir Moreno yamir.moreno@gmail.com
3 3028628066ec2401f3981f4e70c5b1acd4cef573 Lauren Tindale lauren.tindale@bccdc.ca
3 3028628066ec2401f3981f4e70c5b1acd4cef573 Michelle Coombe michelle.coombe.vet@gmail.com
3 3028628066ec2401f3981f4e70c5b1acd4cef573 Jessica Stockdale jessica.stockdale@sfu.ca
3 3028628066ec2401f3981f4e70c5b1acd4cef573 Emma Garlock emma.s.garlock@gmail.com
2 3028628066ec2401f3981f

3 38bab3f17b186bd8ee289e5d135bb7d500ef500b Alexis Robert alexis.robert@lshtm.ac.uk
3 38bab3f17b186bd8ee289e5d135bb7d500ef500b Adam Kucharski adam.kucharski@lshtm.ac.uk
3 38bab3f17b186bd8ee289e5d135bb7d500ef500b Sebastian Funk sebastian.funk@lshtm.ac.uk
2 38e796bbec2f90b1b802c14dc922102e96f6361e Yicang Zhou zhouyc@xjtu.edu.cn
3 395e8569a0e6fc129029cb0b8cda1f7381954976 Michelle Wille michelle.wille@influenzacentre.org
   Uh oh... two double matches!
   they are edward.holmes@sydney.edu.au, edward.holmes@sydney.edu.au
3 395e8569a0e6fc129029cb0b8cda1f7381954976 Edward Holmes edward.holmes@sydney.edu.au
3 396c98994ac34692a5fa7f8502fb33cebc54445a Fan Zhang zhangfansmmu@163.com
1 396c98994ac34692a5fa7f8502fb33cebc54445a Haiying Ma haiying199901@163.com
2 396c98994ac34692a5fa7f8502fb33cebc54445a Luna Sun sunzhuoer@foxmail.com
3 396c98994ac34692a5fa7f8502fb33cebc54445a Zhuoer Sun sunzhuoer@foxmail.com
4 397bb0bda9dfe7504abf8ba98bf628e3368f9329 James Yeongjun ypark1@hsph.harvard.edu
   Uh oh... 

3 4602afcb8d95ebd9da583124384fd74299d20f5b Rebecca Dutch rebecca.dutch@uky.edu
   Uh oh... two double matches!
   they are gary.whittaker@cornell.edu, gary.whittaker@cornell.edu
3 4602afcb8d95ebd9da583124384fd74299d20f5b Gary Whittaker gary.whittaker@cornell.edu
3 4609a2dd3568a7241548b7303e0d836e33d4d5ca Yuzhang Wu wuyuzhang@tmmu.edu.cn
1 4609a2dd3568a7241548b7303e0d836e33d4d5ca Yongwen Chen yongwench@163.com
2 462cbb326ccd8587cae7a3538c8c6712d9013698 Xu Chen chenyan99727@csu.edu.cn
3 462cbb326ccd8587cae7a3538c8c6712d9013698 Cheng Lei leicheng@csu.edu.cn
2 462cbb326ccd8587cae7a3538c8c6712d9013698 Xianglin Zhou zhouzhiguo1217@163.com
2 462cbb326ccd8587cae7a3538c8c6712d9013698 Dixuan Jiang jjiang317@163.com
3 462cbb326ccd8587cae7a3538c8c6712d9013698 Ping Chen pingchen0731@csu.edu.cn
3 462cbb326ccd8587cae7a3538c8c6712d9013698 Yan Chen chenyan99727@csu.edu.cn
3 462cbb326ccd8587cae7a3538c8c6712d9013698 Hong Peng penghong66@csu.edu.cn
3 462cbb326ccd8587cae7a3538c8c6712d9013698 Zhiguo Zhou zh

3 4da8a87e614373d56070ed272487451266dce919 Sofia Morfopoulou sofia.morfopoulou.10@ucl.ac.uk
2 4e2bc97a6164191ba3d53abfa49ccd973aa80326 Zhichao Feng fengzc2016@163.com
1 4e2bc97a6164191ba3d53abfa49ccd973aa80326 Wei Nie cjr.wangwei@vip.163.com
3 4e2bc97a6164191ba3d53abfa49ccd973aa80326 Wei Wang cjr.wangwei@vip.163.com
2 4e7cd4e923777d6caaa76bb2b93f6121bcd5b6a3 Galina Karganova karganova@bk.ru
3 4eb8b7fd0032816e4a29d65b06939266d6446624 Alexander Fleischmann alexander.fleischmann@college-de-france.fr
1 4f53e43ba1bfb84611eaa839994f9297cdd65dc9 David Heslop drdrdavid@protonmail.com
1 4f53e43ba1bfb84611eaa839994f9297cdd65dc9 Raina Macintyre rainam@protonmail.com
3 4f8d24c531d2c334969e09e4b5aed66dcc925c4b Mingli Yuan yuanmingli.good@163.com
3 4f8d24c531d2c334969e09e4b5aed66dcc925c4b Yi Hu huyi_@163.com
2 4ffcd1d8e0f56c5a238bbd4040ec23410ef71a1b Young Choe ychoe@hallym.ac.kr
1 50688ca74af7fb4395c03e3cf7ec3bddc84e6165 Witting nan Michael.witting@helmholtz-muenchen.de
1 50688ca74af7fb4395c03e3cf7

1 598d3eb737dfa7701ce8c29c86bc9f6589d8a581 Man nan bshuyman@gmail.com
2 598d3eb737dfa7701ce8c29c86bc9f6589d8a581 Nguyen Phong bsphong70@oucru.org
2 598d3eb737dfa7701ce8c29c86bc9f6589d8a581 Tran Hien hientt@oucru.org
2 598d3eb737dfa7701ce8c29c86bc9f6589d8a581 Nguyen Truong bsthanhtruong@gmail.com
2 598d3eb737dfa7701ce8c29c86bc9f6589d8a581 Guy Thwaites gthwaites@oucru.org
2 5a5a7b6d40c4b752aabf69f69bbd7c71e2658e8e Isobel Routledge i.routledge15@imperial.ac.uk
1 5a843f4299b04090ff76451decfe9f0b025cfe48 Hayden Metsky hayden@mit.edu
2 5a843f4299b04090ff76451decfe9f0b025cfe48 Katherine Siddle kjsiddle@broadinstitute.org
3 5a9164999237a46d3d6aed53bd59b17d91a9e6d0 Jing Yu yujingrt@163.com
1 5a9164999237a46d3d6aed53bd59b17d91a9e6d0 Wen Ouyang wen19860213@163.com
2 5a9164999237a46d3d6aed53bd59b17d91a9e6d0 Conghua Xie chxie_65@whu.edu.cn
3 5aa0306fa361d31085f6df03d7e71307534b34c6 Han Xiao han.xiao@aalto.fi
3 5b11e063f09adbfdaf7fd515ecc6c09a83674263 Han Ke hanke301@163.com
3 5b11e063f09adbfdaf7fd5

2 681c5099b2afb08fe5cb2f58d92e9c6e4873bec2 Padmini Rangamani prangamani@eng.ucsd.edu
2 683416e300a4f4075835852558855b07c342bd72 Siddappa Byrareddy sid.byrareddy@unmc.edu
2 68e509d42a7d3349550512d80eca4e2b2123613f Wei Zuo zuow@tongji.edu.cn
3 69f473aee22a4ac8df15a54a6109b70b1ae9b7e7 Alanna Schepartz alanna.schepartz@yale.edu
2 6a8d26d75bd44c7666c9b7ed782c13a57f409122 Thomas Mariani Tom_Mariani@urmc.rochester.edu
2 6ae20454d1a9f228864de24660c2460becbc8151 Guo-Wei Wei wei@math.msu.edu
1 6bdd6867205d9b09f833f01c65f1bbfdae469164 Song Ying songy75@mail2.sysu.edu.cn
2 6bdd6867205d9b09f833f01c65f1bbfdae469164 nan Zheng zhengshj9@mail.sysu.edu.cn
3 6bdd6867205d9b09f833f01c65f1bbfdae469164 Li Liang liliang_082@163.com
2 6bdd6867205d9b09f833f01c65f1bbfdae469164 Yutian Chong chongyt@mail.sysu.edu.cn
3 6bdd6867205d9b09f833f01c65f1bbfdae469164 Jun Shen shenjun@mail.sysu.edu.cn
3 6bdd6867205d9b09f833f01c65f1bbfdae469164 Yunfei Zha zhayunfei999@126.com
2 6bdd6867205d9b09f833f01c65f1bbfdae469164 Yuedon

1 74b00f19c3af87d1081644f02490ba250f57b7ca Mujahed Mustafa mujahedibrahim44@gmail.com
2 74bee8bf3229f4c28991a7231c02d911d24770e9 Gustavo Barra gbbarra@gmail.com
3 74bee8bf3229f4c28991a7231c02d911d24770e9 Ticiane Henriques ticianehenriques@gmail.com
1 74bee8bf3229f4c28991a7231c02d911d24770e9 Pedro Mesquita pedrogm@gmail.com
1 74bee8bf3229f4c28991a7231c02d911d24770e9 Rafael Henriques Jácomo rafaeljacomo@sabin.com.br
1 754315299d847600d6c5d414665c728d40bf731d Ming 1# wkming@connect.hku.hk
3 75e68869f9b65bca661e768402395d9dede6de2c Shilei Zhao zhaoshilei2018d@big.ac.cn
2 7607a4ad84452e29998a80f44bfd6bb2f5f68a7f Chuang Liu liulei3322@aliyun.com
2 7607a4ad84452e29998a80f44bfd6bb2f5f68a7f Congcong Liu liulei3322@aliyun.com
2 7607a4ad84452e29998a80f44bfd6bb2f5f68a7f Weilong Liu liulei3322@aliyun.com
2 7607a4ad84452e29998a80f44bfd6bb2f5f68a7f Yingxia Liu liulei3322@aliyun.com
2 7607a4ad84452e29998a80f44bfd6bb2f5f68a7f Zheng Liu liulei3322@aliyun.com
   Uh oh... two double matches!
   they are z

1 83cca3779f8aeea47c5426e1d0fa65f1e9567e0c Amit Kumar amitk@iiti.ac.in
2 8575c06c64fdd0bb43efc46cd994f557698c755f Xingwang Jia jiaxingw301@163.com
2 8575c06c64fdd0bb43efc46cd994f557698c755f Pengjun Zhang zhangpj301@126.com
2 8575c06c64fdd0bb43efc46cd994f557698c755f Yaping Tian tianyp61@gmail.com
3 8575c06c64fdd0bb43efc46cd994f557698c755f Junli Wang wangjunli0110@163.com
3 8575c06c64fdd0bb43efc46cd994f557698c755f Jun Wang wangjunli0110@163.com
2 8575c06c64fdd0bb43efc46cd994f557698c755f Jiao Liu docliu18@qq.com
3 8575c06c64fdd0bb43efc46cd994f557698c755f Zeyan Chen chenzeyan8@163.com
2 8575c06c64fdd0bb43efc46cd994f557698c755f Lijun Zhang zhangpj301@126.com
3 8575c06c64fdd0bb43efc46cd994f557698c755f Haihong He hehaihong@163.com
2 8575c06c64fdd0bb43efc46cd994f557698c755f Yajie Liu docliu18@qq.com
3 85e00fe8099f838d9eb455a7f62440d1bf7bc639 Hongyi Chen chenhongyi@163.com
3 85e00fe8099f838d9eb455a7f62440d1bf7bc639 Xiaodong Li xiaodong.li@ascletis.com
3 85e00fe8099f838d9eb455a7f62440d1bf7bc639 

1 90e28ff462882ca7a9329beb879f73c2e99430e4 Jeremy nan Jeremy.Reiter@ucsf.edu
2 90e28ff462882ca7a9329beb879f73c2e99430e4 nan Reiter Jeremy.Reiter@ucsf.edu
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Qunhong Wu wuqunhong@163.com
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Xi Chen xi.chen@yale.edu
2 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Zhuo Chen zchen1@uga.edu
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Haridah Alias haridahalias@gmail.com
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Mingwang Shen mingwangshen521@xjtu.edu.cn
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Shiwei Duan duanshiwei@nbu.edu.cn
3 91098f6fe46a21565bf0cb06fe960cdb2c3f5e38 Jinjie Zhang zhangjinjie@nbu.edu.cn
1 9211b1703c76e20afdee8fcb9f7996edadef0e9e Aman 2&amp; saudaman@hotmail.com
3 9231e9d6dd98dca26e67eaf02eea8c0ad0bcce3b Louise Smith louise.e.smith@kcl.ac.uk
3 9231e9d6dd98dca26e67eaf02eea8c0ad0bcce3b Rebecca Webster rebecca.webster@kcl.ac.uk
3 9231e9d6dd98dca26e67eaf02eea8c0ad0bcce3b Dale Weston dale.weston@phe.gov.

2 9ad252c633b7596a4b64ca9f1699a0db5ce99c05 Jiayou Zhang zhangx@shsmu.edu.cn
2 9ad252c633b7596a4b64ca9f1699a0db5ce99c05 Lianghao Zhang zhangx@shsmu.edu.cn
2 9ad252c633b7596a4b64ca9f1699a0db5ce99c05 Zhi Zhang zhangx@shsmu.edu.cn
2 9ad252c633b7596a4b64ca9f1699a0db5ce99c05 Wei Zhang zhangx@shsmu.edu.cn
2 9ad252c633b7596a4b64ca9f1699a0db5ce99c05 Xinxin Zhang zhangx@shsmu.edu.cn
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Jie Liu liu_jie0823@163.com
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Liu Ouyang ouyangliu211@hust.edu.cn
2 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Pi Guo pguo@stu.edu.cn
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Haisheng Wu wuhaisheng01@163.com
1 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Peng Fu shuyi.peng@foxmail.com
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Xiaoyu Han xiaoyuhan1123@163.com
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Yukun Cao caoyukun@foxmail.com
3 9ae476404f7ef1ec1ede965f0b898f31a5bf5a81 Osamah Alwalid dr.osamah_alwalid@yahoo.com
3 9ae476404f7ef1ec1ede

1 a388cca261e8af7936cf3b79c35f43442f65706b Liangzhi Xie liangzhi@yahoo.com
2 a400b0cfaa60df27b2ee3beea37dd58157912c34 Mattia Sanna msanna@tmu.edu.tw
2 a400b0cfaa60df27b2ee3beea37dd58157912c34 Chi Wen cwengood@nhri.edu.tw
3 a42ff48d50aa3a0ebfe840d46ed2204c49955442 Long Huang longhuang19@126.com
3 a42ff48d50aa3a0ebfe840d46ed2204c49955442 Fuming Xu fumingxu@126.com
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Adam Kucharski adam.kucharski@lshtm.ac.uk
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Timothy Russell timothy.russell@lshtm.ac.uk
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Charlie Diamond charlie.diamond@lshtm.ac.uk
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Yang Liu yang.liu@lshtm.ac.uk
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 John Edmunds john.edmunds@lshtm.ac.uk
3 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Sebastian Funk sebastian.funk@lshtm.ac.uk
2 a44334e676e43c1889d282b0f0a8365f0f1e0c52 Rosalind Eggo r.eggo@lshtm.ac.uk
1 a48bc1745fa15021b49890385022bbe7c4b1076b Neeltje Van Dorema

3 ac4a3bbe27272cd6780e7168906b0bf1413fc749 Brett Lindenbach brett.lindenbach@yale.edu
3 ac81102667b0d56edeb8ab0044765dc49a19f374 Volker Thiel Volker.thiel@vetsuisse.unibe.ch
2 acb0e11e5763dbf7588ec435751a2ab705a783f1 Arunachalam Ramaiah aramaiah@uci.edu
2 acb0e11e5763dbf7588ec435751a2ab705a783f1 Vaithilingaraja Arumugaswami VArumugaswami@mednet.ucla.edu
3 ad146e228bda4e5a3d56b28a3a1acbd01e0b323e Marco Traini marcoclaudio.traini@unitn.it
2 adb3a6501ad731eda95d0a1a182a793a0dcd58b0 Aimee Taylor ataylor@hsph.harvard.edu
2 ae6fc64042b050df93ebb8f8045892952f18510f Andreas Handel ahandel@uga.edu
2 aef0734eb2538d3624d650a4ae20c3f66cace5f2 Vania Prado mprado@robarts.ca
2 aef0734eb2538d3624d650a4ae20c3f66cace5f2 Marco Prado mprado@robarts.ca
3 af266fac8970a7960e96630a67d91bec5dda0335 Ganyani Tapiwa tapiwa.ganyani@uhasselt.be
2 b0746b57a6c8f3cd9e52a27b0d708d069134b502 Léa Joffrin lea.joffrin@gmail.com
   Uh oh... two double matches!
   they are camille.lebarbenchon@univ-reunion.fr, camille.lebarb

1 b9c285b9be5524a8d241c0a32a1cb1ef9df155f6 Albertine Weber albertinecar@gmail.com
3 b9c285b9be5524a8d241c0a32a1cb1ef9df155f6 Flavio Iannelli iannelli.flavio@gmail.com
2 b9d429419f94f203f63439ffd95e3cac2a958fa6 David Enard denard@email.arizona.edu
3 b9dbb79c9e06164c3b7fefce67c11c5d6caf1fcd Jakub Bartoszewicz jakub.bartoszewicz@hpi.de
   Uh oh... two double matches!
   they are bernhard.renard@hpi.de, bernhard.renard@hpi.de
3 b9dbb79c9e06164c3b7fefce67c11c5d6caf1fcd Bernhard Renard bernhard.renard@hpi.de
3 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Mengcen Qian qianmengcen@fudan.edu.cn
3 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Qianhui Wu wuqianhui1994@foxmail.com
3 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Peng Wu wupeng06@gmail.com
2 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Zhiyuan Hou zyhou@fudan.edu.cn
3 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Yuxia Liang scudxliangyuxia@163.com
2 ba1c4c0de19352d0b9ed7db6199803b621f0a8f2 Benjamin Cowling bcowling@hku.hk
1 ba3efcd6b74e55327fd7db470d824f

2 c10661c9b35e068691b879661bcebeba3bd6aad9 Xuhui Chen chenxh_2016@126.com
2 c10661c9b35e068691b879661bcebeba3bd6aad9 Liwen Chen chenxh_2016@126.com
2 c10661c9b35e068691b879661bcebeba3bd6aad9 Bo Chen chenxh_2016@126.com
2 c12ad8b411c24fe215be91d1f990645743e55ab5 Lili Wang yiwangz@umich.edu
1 c12ad8b411c24fe215be91d1f990645743e55ab5 Yiwang Zhou yiwangz@umich.edu
3 c12ad8b411c24fe215be91d1f990645743e55ab5 Jie He jiehe@umich.edu
2 c12ad8b411c24fe215be91d1f990645743e55ab5 Fei Wang yiwangz@umich.edu
3 c12ad8b411c24fe215be91d1f990645743e55ab5 Lu Tang lutang@pitt.edu
1 c12ad8b411c24fe215be91d1f990645743e55ab5 Marisa Eisenberg marisae@umich.edu
2 c12ad8b411c24fe215be91d1f990645743e55ab5 Peter Song pxsong@umich.edu
1 c14d50924f959d38ff7857e6db3a60c929448956 Helena nan helena.maier@pirbright.ac.uk
2 c14d50924f959d38ff7857e6db3a60c929448956 nan Maier helena.maier@pirbright.ac.uk
2 c1a08ce8f2c5d5311b19822c3df5aa9d25c4d93c Zhiguo Dai dai_zg2015@sina.com
1 c1a08ce8f2c5d5311b19822c3df5aa9d25c4d93c Dai

2 ce1d3a3103bbd8b5d7164259be03f1d6d3d5d251 Qinzhen Cai cai200504046@126.com
1 ce1d3a3103bbd8b5d7164259be03f1d6d3d5d251 Hong Sun shong19@163.com
2 d010cc0209bc5a0eee6259f72c42590c448da1cf nan Engel philipp.engel@unil.ch
3 d01e580a45c457c7c200cd884e746b4d836e2ea9 Max Crispin max.crispin@soton.ac.uk
3 d069dfb7f0aefcdc2c890a1bbe773ebd26b01a55 Ke Wu doctorwuke@sjtu.edu.cn
2 d069dfb7f0aefcdc2c890a1bbe773ebd26b01a55 Junhua Zheng zhengjh0471@sina.com
1 d070dc3ef2125fd891f3e023598831e23a145880 Zeng Li zengjy321@tsinghua.edu.cn
2 d070dc3ef2125fd891f3e023598831e23a145880 Hainian Zeng zengjy321@tsinghua.edu.cn
   Uh oh... two double matches!
   they are zhaodan2018@tsinghua.edu.cn, zhaodan2018@tsinghua.edu.cn
   Uh oh... two double matches!
   they are zhaodan2018@tsinghua.edu.cn, zhaodan2018@tsinghua.edu.cn
3 d070dc3ef2125fd891f3e023598831e23a145880 Dan Zhao zhaodan2018@tsinghua.edu.cn
2 d070dc3ef2125fd891f3e023598831e23a145880 Xiaokun Shen steve.shen@convalife.com
2 d070dc3ef2125fd891f3e02359883

1 d82fe700418fb6494ffb9a2c0cf4f6b8012b3824 Mark nan mark.denison@vumc.org
2 d82fe700418fb6494ffb9a2c0cf4f6b8012b3824 nan Denison mark.denison@vumc.org
3 d83e3c028de1950c4f8dedae21eb90f90c4ed6c3 Maritza Jaramillo maritza.jaramillo@iaf.inrs.ca
3 d83f238ec5aeaf6f08d20c4416c1ca9f631f698e Nathan Lo nathan.lo@ucsf.edu
2 d83f238ec5aeaf6f08d20c4416c1ca9f631f698e Trevor Hastie hastie@stanford.edu
3 d83f238ec5aeaf6f08d20c4416c1ca9f631f698e Sanjay Basu sanjay_basu@hms.harvard.edu
2 d8c2466863d8ff87bc1aaf5218ce6f3f7fb29c3b Luis Barreiro lbarreiro@uchicago.edu
2 d9eeeb81d17be6a722b11081f13576de197d249f Ming Shi shikidney@qq.com
2 d9eeeb81d17be6a722b11081f13576de197d249f Guohua Ding ghxding@gmail.com
   Uh oh... two double matches!
   they are sebastian.eustermann@embl.de, sebastian.eustermann@embl.de
   Uh oh... two double matches!
   they are sebastian.eustermann@embl.de, sebastian.eustermann@embl.de
3 da3aa20131ac2805c0d9e1b29f094683479ab5b7 Sebastian Eustermann sebastian.eustermann@embl.de
2 da3

2 e1632ff25e6c30d4d89828154be1389a90109db8 Mohammad Arif arif@hawaii.edu
3 e172b0d49bd083dca5cd1021900477dd97774b84 Joel Miller joel.c.miller.research@gmail.com
3 e172b0d49bd083dca5cd1021900477dd97774b84 Yang Ge yang.ge@uga.edu
2 e172b0d49bd083dca5cd1021900477dd97774b84 Chun-Hai Fung cfung@georgiasouthern.edu
1 e226beab8af4202a5182f7603fe468dedd5dee6a Sandra nan sandra.loesgen@oregonstate.edu
1 e226beab8af4202a5182f7603fe468dedd5dee6a Loesgen nan sandra.loesgen@oregonstate.edu
3 e367637e063ec728b13f3685e8aee5f775c553cd Biqing Chen chenbiqing333@163.com
2 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Julie Spencer jaspencer@lanl.gov
2 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Deborah Shutt dshutt@lanl.gov
3 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Hannah Clegg hannah_clegg@lanl.gov
2 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Helen Wearing hwearing@unm.edu
1 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Harshini Mukundan harshini@unm.edu
2 e3f19f88e7e3d1279b4d93e4db041e44a8d35dbb Carrie Manore cmano

3 eaf0b485f290fa884dd18de71be79d86de20eb31 Lu Liang lianglu1@foxmail.com
3 eaf0b485f290fa884dd18de71be79d86de20eb31 Mengcen Qian qianmengcen@fudan.edu.cn
3 eaf0b485f290fa884dd18de71be79d86de20eb31 Yuxia Liang scudxliangyuxia@163.com
2 eaf0b485f290fa884dd18de71be79d86de20eb31 Juanjuan Zhang zhangjuan4486@163.com
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Hongzhou Lu luhongzhou@fudan.edu.cn
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Jingwen Ai jingwenai1990@126.com
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Yinzhong Shen shenyinzhong@shphc.org.cn
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Yang Li lixiangyang820@hotmail.com
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Tao Li litaokc@126.com
3 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Xian Zhou zhouxian-13@163.com
1 eb8ac60527db35b10881cb4fd86b8a6e21983d02 Zhang nan wenhongzhang_hs@126.com
2 eb8ac60527db35b10881cb4fd86b8a6e21983d02 ，qiran Zhang wenhongzhang_hs@126.com
2 eb8ac60527db35b10881cb4fd86b8a6e21983d02 ，yun Ling lingyun@shaphc.org
3 e

2 f339a22cebc1bc0099beb7aea41be09cc916e5dc François Ferron francois.ferron@afmb.univ-mrs.fr
3 f33c6d94b0efaa198f8f3f20e644625fa3fe10d2 Corey Watson corey.watson@mssm.edu
3 f40eaec892778166e4f8f8eeb576893e82a591be Yuzhen Zhang zhangyuzhen@suda.edu.cn
1 f40eaec892778166e4f8f8eeb576893e82a591be Bin Jiang jbin@suda.edu.cn
3 f47af9fe364fd9bac5e061142208d801667c5aca Zhaowei Zhu zhuzhaowei@csu.edu.cn
2 f47af9fe364fd9bac5e061142208d801667c5aca Jianjun Tang tangliang1226@126.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Zhenfei Fang zhenfeifang@sina.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Qiming Liu qimingliu@126.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Xinqun Hu huxinqungs@163.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Danyan Xu xudanyan02@sina.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Jia He hejia2007@hotmail.com
3 f47af9fe364fd9bac5e061142208d801667c5aca Liang Tang tangliang1226@126.com
2 f47af9fe364fd9bac5e061142208d801667c5aca Shi Tai samtai2012@163.com
3 f49a9b36227

3 f9c6a656d0ce352cc1fa0df8d83e6d4a1f53d7c2 Nick Wilson nick.wilson@otago.ac.nz
3 f9c6a656d0ce352cc1fa0df8d83e6d4a1f53d7c2 Amanda Kvalsvig amanda.kvalsvig@otago.ac.nz
3 f9c6a656d0ce352cc1fa0df8d83e6d4a1f53d7c2 Lucy Barnard lucy.telfar-barnard@otago.ac.nz
3 f9c6a656d0ce352cc1fa0df8d83e6d4a1f53d7c2 Michael Baker michael.baker@otago.ac.nz
3 fbc41a8e025cd6eb2d7c156aad3b6923af2349c4 Chao Wu wuchao1984@zju.edu.cn
2 fbc41a8e025cd6eb2d7c156aad3b6923af2349c4 Shufa Zheng minzheng@zju.edu.cn
3 fbc41a8e025cd6eb2d7c156aad3b6923af2349c4 Yu Chen chenyuzy@zju.edu.cn
3 fbc41a8e025cd6eb2d7c156aad3b6923af2349c4 Min Zheng minzheng@zju.edu.cn
2 fbda9162de4c4a881b118dd140f1e976902df15e Qi-Chuan Zhang zhang_yonghai@126.com
2 fbebe4b66073c44cface2e842754bce26e3e2913 Xumao Zhao zhaoxm@lzu.edu.cn
2 fbebe4b66073c44cface2e842754bce26e3e2913 Yuehua Sun sunyh@ioz.ac.cn
2 fc15f433048e4d9464964930c36a1e7057ba1088 Jason Mclellan jmclellan@austin.utexas.edu
3 fc7a6b5d1852c5ecce2d20fd0d73d5f957ed7055 Benjamin Maier benja

2 0ed539fa6dbf35a97bad42eea7182ca5c36c47af Brenda Ang kwang@tll.org.sg
2 0ed539fa6dbf35a97bad42eea7182ca5c36c47af Jimmy Kwang kwang@tll.org.sg
2 0ed539fa6dbf35a97bad42eea7182ca5c36c47af J Kwang kwang@tll.org.sg
2 0f26fcb654f47c611134190653ed92fd38dd5d67 Hongmei Jiao jiao@yzu.edu.cn
2 0f26fcb654f47c611134190653ed92fd38dd5d67 Xinan Jiao jiao@yzu.edu.cn
2 0fc42c4b3227cbd5f17a24e441c9526435bc504a Fang Yuan fyuan@arcbs.redcross.org.au
2 0fcae4067d557b85dd27c6982344b763263e6ab2 Tim Mackey tmackey@ucsd.edu
2 1027702151e441039ec492e11d4fd20eb7da65dc William Lindsley wlindsley@cdc.gov
2 106bb31b6de9ffdd9f66285134e29df38c6957ab Barry Fields bfields@cdc.gov
3 11632bbc30436471453dedcc618a2efce7622e7e Diego Lara diego.lara@ochsner.org
2 139e6ba9edaf8b1518f33f6d1e2f134c4e2cd74e Eduardo Davila edavila@som.umaryland.edu
1 14ab6eeafd7fdbca309142856b179fe0438fd61a Yijing Li prof.lyijing@yahoo.com.cn
2 15725f58fc5a9f23c2d12dd7695557771f2dd857 Hui Tey jeannie_tey@moh.gov.sg
3 1a6a8f6d19437c3bcfcdce9557131

1 a866478edd938372f5321371609432b4eca3ebc9 Erica Saphire erica@scripps.edu
2 acfb5f151553011b81df2078523c2322b646cc58 Jeffrey Lee jlee@scripps.edu
2 ada90e72a1506a09225f70eeb53ef82c10bf2f65 Thomas Lane tom.lane@utah.path.edu
4 ae267a98af103856a22932cac54e9208b34da846 J Gavora helene.hayes@inra.fr
2 aea77148a52e6ebafa7162a57b19805da03a447c Ziad Memish zmemish@yahoo.com
2 af4d56adf262ed89e8005d2e78e3760370fa346e Susan Baker sbaker1@luc.edu
2 b1555b131850b0806f304b2005ad9e537a4ed7e8 Shan Sun dssun@mail.tcu.edu.tw
2 b1555b131850b0806f304b2005ad9e537a4ed7e8 Der-Shan Sun dssun@mail.tcu.edu.tw
3 b2a29d12403e49b5b2b560ce350632a886ed8a6e Gergely Tekes gergely.tekes@vetmed.uni-giessen.de
3 b75374ebf027be698e5ce4e721a4840dc4384240 Guido Antonelli guido.antonelli@uniroma1.it
2 bbe2e49fac5f007f3313b35f9b264bb4219aa67d Kentaro Kato kkato(at)obihiro.ac.jp
2 bbff324ce85ca876d0c96244b3c9fa479c7f300f Chris Bunce c.m.bunce@bham.ac.uk
3 bcad0a2902802ee1d5a8389d8bbbbcc61bb277ee John Ziebuhr john.ziebuhr@vi

In [168]:
# Remove conflicts (resolve to highest strength match)
for paper in authors.sha.unique():
    counts=(authors[authors.sha==paper].email.value_counts())
    
    cx=counts.index
    for c in range(0,len(counts)):
        #print(c.email)
        if(counts[c]>1):
            therecords=authors[(authors.sha==paper) & (authors.email==cx[c])]
            maxstrength=therecords.strengths.max()

            ids=therecords[therecords.strengths!=maxstrength].index
            authors.email[ids]=""

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [169]:
# Apply cleaning to the emails field

# Find the most common domain endings in the set.
# Since majority are entries, we can use thse to help us 
# split concatenated strings. Good domains have >1 characrter.
Emails=authors.email[authors.notna().email]
CommonEndings={}
for em in Emails:
    Ending=em.split('.')[-1]
    if(len(Ending)>1):
        if(not Ending in CommonEndings.keys()):
            CommonEndings[Ending]=1
        else:
            CommonEndings[Ending]+=1

# An ending is popular (and so probably real)
# if it has >5 use cases in the set.           
PopularEndings=[]
PopularCounts=[]
for key in CommonEndings.keys():
    if(CommonEndings[key]>5):
        PopularEndings.append("."+key)
        PopularCounts.append(CommonEndings[key])
endings=np.array(PopularEndings)

# I noticed from the dataset that we need to protect these two, 
# since they contain within them other good (more popular) domain
# strins
endings=endings[np.argsort(PopularCounts)][::-1]
endings=np.concatenate([[".int",".gov.uk"],endings])

print("Popular endings:")
print(endings)

# This function fixes two pathologies:
#  1) Bad characters or strings jammed into the email address - common ones
#       picked out by observation of the list;
#
#  2) Valid email concatenated at end with more text, identified via
#       noting occurence of popular domain name in the string.
#
#   [We cannot fix concatenated at beginning, since string jammed on front is
#   still a valid formatted address, in most cases.]

def FixIt(em):
    # Forbidden word /symbol cleaning
    if(pd.isna(em)):
        return np.NaN
    forbidden=['author','emailaddress','correspondingauthor','telephone','<','>','*-','*',':','†']
    Changed=False
    for forb in forbidden:
        if(forb in em):
            splits=em.split(forb)
            for s in splits:
                #after split, the part contianing 
                #  an @ is an email address
                if "@" in s:   
                    em=s
                    
    # popular domain name based cleaning        
    for ending in endings:
        found=(em.find(ending))
        wheresat=em.find("@")

        if((found>0) and ((found+len(ending))<=len(em)) and (found > wheresat)):
            em=em[0:found+len(ending)]
            break
    
    return em



Popular endings:
['.int' '.gov.uk' '.com' '.edu' '.cn' '.uk' '.de' '.fr' '.org' '.ca' '.au'
 '.gov' '.hk' '.tw' '.jp' '.kr' '.nl' '.it' '.ch' '.sg' '.br' '.es' '.se'
 '.be' '.net' '.il' '.sa' '.nz' '.fi' '.pl' '.za' '.in' '.at' '.no' '.eu'
 '.mil' '.ir' '.pt' '.ie' '.ru' '.dk' '.gr' '.cat' '.int' '.my' '.th'
 '.cz' '.ar' '.tr' '.mx' '.qa' '.cl' '.co' '.to' '.flzhang&#x00040' '.hu'
 '.si' '.wang' '.com†']


In [170]:

# Sample 100 for smell test
for em in Emails[0:100]:
    em2=FixIt(em)
    if(em2!=em):
        print(em + " -> " + em2)


NewEmails=[]        
for em in authors.email:
    NewEmails.append(FixIt(em))
        
#Now go ahead and apply it to the dataset
#def FixEmail(row):
#    newem=FixIt(row.email)
#    if(newem!=row.email):
#        row.email=newem
#    return row

authors.email=NewEmails

milne@csse.uwa.edu.au -> milne@csse.uwa.edu
e.:ehondo@agr.nagoya-u.ac.jp -> ehondo@agr.nagoya-u.ac.jp
marin@fcien.edu.uym.marín. -> marin@fcien.edu
mmiceli@med.umich.edu. -> mmiceli@med.umich.edu
craig.mccormick@dal.cac.m. -> craig.mccormick@dal.ca
d.khaperskyy@dal.cad.a.k. -> d.khaperskyy@dal.ca
marta.gaglia@tufts.edum.m.g. -> marta.gaglia@tufts.edu
niuyuming@yeah.net. -> niuyuming@yeah.net
claudio.canal@ufrgs.brc.w.canal. -> claudio.canal@ufrgs.br
yxiao@mail.xjtu.edu.cny.xiao -> yxiao@mail.xjtu.edu
rbaric@email.unc.edu. -> rbaric@email.unc.edu
chuakb@tll.org.sg -> chuakb@tll.org
jidoyaga@stanford.edu. -> jidoyaga@stanford.edu
nbellei@uol.com.br -> nbellei@uol.com
s.puechmaille@gmail.com. -> s.puechmaille@gmail.com
rbaric@email.unc.edu. -> rbaric@email.unc.edu
es:peter.durr@csiro.aup.a.durr -> peter.durr@csiro.au
linfa.wang@duke-nus.edu.sg -> linfa.wang@duke-nus.edu
evelyn_koay@nuhs.edu.sg -> evelyn_koay@nuhs.edu
evelyn_koay@nuhs.edu.sg -> evelyn_koay@nuhs.edu
pfeffer@ibmc-cnrs.unistr

In [171]:
# Save the output file
authors.drop('strengths',axis=1)
authors.to_csv("AuthorsWithEmails.csv")

In [174]:
sum(authors.notna().email)

15678