In [None]:
#@title Parameters — edit these fields

#@markdown ### 出力設定
#@markdown **出力ファイル名** — ダウンロードされる.docxファイルの名前
file_name_download = 'papers_talks_books.docx' #@param {type:"string"}

#@markdown ---
#@markdown ### 期間設定
#@markdown **開始日** — この日付より後に出版された業績を収集する (YYYY-MM-DD)
globalmindate='2025-04-01' #@param {type:"string"}
#@markdown **終了日** — この日付より前に出版された業績を収集する (YYYY-MM-DD)
globalmaxdate='2030-04-01' #@param {type:"string"}

#@markdown ---
#@markdown ### 書式設定
#@markdown **謝辞マーク** — 課題番号が謝辞に含まれる論文に付けるマーク（不要なら空欄）
smark='' #@param {type:"string"}
#@markdown **すべて英語名** — Trueで全セクション英語名表記、Falseで講演・書籍・その他は日本語名表記
allenglish = True #@param {type:"boolean"}
#@markdown **論文ナンバリング** — Trueで論文リストに連番 (1. 2. 3. ...) を付ける
numberingPapers = True #@param {type:"boolean"}
#@markdown **査読ありのみ** — Trueでresearchmapで査読ありとされた論文のみ抽出
peer_reviewed = False #@param {type:"boolean"}

#@markdown ---
#@markdown ### データソース
#@markdown **スプレッドシートURL** — メンバー情報（researchmap ID、氏名、代表/分担、課題番号）を含むGoogle SheetsのURL
sheeturl='https://docs.google.com/spreadsheets/d/1T5QtMv4M_peHHM-Zj4oFmS1jHG4voDBJipbEEY0xFQs/edit?usp=sharing' #@param {type:"string"}

In [None]:
#@title Imports and setup
import requests,json,sys,os, gspread, time, re
import numpy as np
if 'google.colab' in str(get_ipython()):
    %pip install python-docx
    from google.colab import files,auth
    from oauth2client.client import GoogleCredentials
    outputdirectory = ''
else:
    outputdirectory = '../docx-researchmap-outputs/' #ローカルで実行する場合は保存ファイルのディレクトリを適当に指定
    os.makedirs(outputdirectory,exist_ok=True)
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_UNDERLINE
import pandas as pd
file_name=outputdirectory+file_name_download

In [None]:
#@title Download spreadsheet
sheeturl_csv=re.match("https://docs.google.com/spreadsheets/d/.+/",sheeturl).group(0)+"export?format=csv"
name_data=pd.read_csv(sheeturl_csv,dtype=str)
name_data

In [None]:
#@title Parse member data
membernum=len(name_data)

allnames=(name_data["First name"]+' '+name_data["Surname"]).to_list()
allSurname=name_data["Surname"].to_list()
allnamesJP=(name_data["苗字"]+" "+name_data["名"]).to_list()
allmembers=name_data["researchmapID"].to_list()
allDB=name_data["代表分担協力"].values
allDaihyoBuntan=list(allDB)
grant_numbers=name_data["grantID"].to_list()
allmindate=name_data["Start date"].to_list()
allmaxdate=name_data["End date"].to_list()

# Build extended name list including alternate name spellings
def _collect_alt_names(col, db_array):
    """Return (names, db_labels) for non-NaN entries in a column."""
    mask = ~pd.isna(col).values
    return list(col.values[mask]), list(db_array[mask])

alt_names2, alt_db2 = _collect_alt_names(name_data['著者名（2個目）'], allDB)
alt_names3, alt_db3 = _collect_alt_names(name_data['著者名（3個目）'], allDB)

nameList = [n.strip() for n in allnames + alt_names2 + alt_names3]
daihyobuntanList = allDaihyoBuntan + alt_db2 + alt_db3

In [None]:
#@title Helper functions
# Function to set the name order (firstname surname — Western order)
def SurnameLast(namesDic,sn):
    oldnamelist=[]
    swap=0
    for indiv in namesDic:
        oldnamelist=oldnamelist+[indiv['name'].replace(',','').replace('.','')]
    for name in oldnamelist:
        if sn in name.split(' '):
            if name.split(' ').index(sn)==0:
                swap=1  # surname is first, need to move to end
                break;
            else:
                swap=0  # surname already last, no swap needed
                break;
    if swap:
        newnamelist=[]
        for name in oldnamelist:
            namesplit=name.split(' ')
            names=namesplit[1:]+[namesplit[0]]  # move first to end
            newnamelist=newnamelist+[' '.join(names)]
    else:
        newnamelist=oldnamelist
    return newnamelist

def ReturnDictWOerror(dictdata,key,nodata):
    if key in dictdata.keys():
        return dictdata[key]
    else:
        return nodata

def ReturnDictContent(dictdata,key,key1,nodata=''):
    d=ReturnDictWOerror(dictdata,key,nodata)
    d1=ReturnDictWOerror(dictdata,key1,nodata)
    if d!=nodata:
        return d
    else:
        return d1

def commaR(vol,spage):
    if (vol=='') & (spage==''):
        return ''
    elif (vol=='') | (spage==''):
        return ' '
    else:
        return ', '

def strip_html_tags(text):
    """Remove HTML/XML tags like <scp>, <i>, </i>, etc. from text."""
    return re.sub(r'<[^>]+>', '', text)

# Known bioRxiv DOI prefixes
BIORXIV_DOI_PREFIXES = ('10.1101/', '10.64898/')

# Journal abbreviation cache and lookup via abbreviso (ISO 4)
_jname_cache = {}

def abbreviate_journal(name):
    """Look up ISO 4 abbreviation for a journal name via abbreviso API.
    Returns the abbreviated name with proper capitalization (e.g. 'Sci. Adv.').
    Falls back to the original name if the API is unreachable or returns nothing."""
    if not name:
        return name
    if name in _jname_cache:
        return _jname_cache[name]
    try:
        r = requests.get(
            'https://abbreviso.toolforge.org/abbreviso/a/' + requests.utils.quote(name),
            timeout=5)
        if r.status_code == 200:
            abbrev = r.text.strip()
            if abbrev:
                _jname_cache[name] = abbrev
                return abbrev
    except Exception:
        pass
    _jname_cache[name] = name
    return name

def add_underlined_run(paragraph, name, nameList, daihyobuntanList):
    """Add a run with D=double-underline, B=single-underline, else plain."""
    if name in nameList:
        role = daihyobuntanList[nameList.index(name)]
        if role == 'D':
            paragraph.add_run(name).underline = WD_UNDERLINE.DOUBLE
        elif role == 'B':
            paragraph.add_run(name).underline = True
        else:
            paragraph.add_run(name)
    else:
        paragraph.add_run(name)

def sort_by_date_desc(items_dict):
    """Return keys of items_dict sorted by 'date' field, newest first."""
    keys = list(items_dict.keys())
    datelist = [items_dict[k]['date'] for k in keys]
    arg = np.argsort(datelist)[::-1]
    return keys, arg

In [None]:
#@title Download from researchmap API
url = "https://api.researchmap.jp/"
itemslist = ["published_papers","research_projects","misc","presentations","books_etc"]
jsonfiles={}
for name in allmembers:
  print('downloading: '+name)
  jsonfiles[name]={}
  for it in itemslist:
    r1 = requests.get(url+name+'/'+it)
    jsonfiles[name][it]=json.loads(r1.text)
    if 'error' in jsonfiles[name][it].keys():
      print(jsonfiles[name][it]['error'])
      print("  error in:"+it)

In [None]:
#@title Build papers dictionary
i=0
PapersDict={}

doilist=[]
doiDict={}
titlelist=[]
titleDict={}
for ids,fullname,surname,dh,mindate,maxdate in zip(allmembers,allnames,allSurname,allDaihyoBuntan,allmindate,allmaxdate):
    dfP = jsonfiles[ids]["published_papers"]
    dfG = jsonfiles[ids]["research_projects"]
    if 'items' in dfG.keys():
        grantID="0"
        for dfs in dfG['items']:
            if 'identifiers' in dfs.keys():
                if 'grant_number' in dfs['identifiers'].keys():
                    if dfs['identifiers']['grant_number'][0] in grant_numbers:
                        grantID=dfs['rm:id']
                        break
    if 'items' in dfP.keys():    
        for dfs in dfP['items']:
            if "authors" not in dfs.keys():
                continue
            if ('identifiers' in dfs.keys()) & (dfs["publication_date"]>=mindate) & (dfs["publication_date"]<=maxdate):
                doinum=[0]
                if 'doi' in dfs['identifiers'].keys():
                    doinum=dfs['identifiers']['doi']

                PapersDict[i]={}
                PapersDict[i]['issues']=False
                PapersDict[i]['preprint']=False
                correspo=False
                Ryoiki=False
                if 'research_project_id' in dfs['identifiers'].keys():
                    if grantID in dfs['identifiers']['research_project_id']:
                        Ryoiki=True
                if "published_paper_owner_roles" in dfs.keys():
                    if "corresponding" in dfs["published_paper_owner_roles"]:
                        correspo=True

                jname=''        
                if "publication_name" in dfs.keys():
                    jname=strip_html_tags(ReturnDictContent(dfs["publication_name"],'en','ja',''))

                if jname.upper() =='ARXIV':
                    PapersDict[i]['preprint']=True
                    if "arxiv_id" in dfs['identifiers'].keys():
                        jname=dfs['identifiers']['arxiv_id'][0] + ' (preprint)'
                    else:
                        jname='arXiv'
                
                if not("publication_name" in dfs.keys()):
                    if "arxiv_id" in dfs['identifiers'].keys():
                        jname=dfs['identifiers']['arxiv_id'][0] + ' (preprint)'
                        PapersDict[i]['preprint']=True
                    elif doinum[0]!=0:
                        jname='DOI: '+doinum[0]
                        PapersDict[i]['preprint']=True
                    else:
                        jname='journal unspecified'
                        PapersDict[i]['issues']=True

                # bioRxiv detection by DOI prefix
                if doinum[0] != 0 and str(doinum[0]).startswith(BIORXIV_DOI_PREFIXES):
                    if jname == '' or jname == 'DOI: ' + doinum[0]:
                        jname = 'bioRxiv'
                        PapersDict[i]['preprint'] = True

                # Abbreviate real journal names via ISO 4 lookup
                if not PapersDict[i]['preprint'] and not PapersDict[i]['issues']:
                    jname = abbreviate_journal(jname)

                Sname=SurnameLast(ReturnDictContent(dfs["authors"],'en','ja',''),surname)

                spage=''
                if "starting_page" in dfs.keys():
                    if dfs["starting_page"]!='':
                        spage=dfs["starting_page"]

                vol=''
                if "volume" in dfs.keys():
                    if dfs["volume"]!='':
                        vol=' '+dfs["volume"]

                # Track duplicates by DOI
                if doinum in doilist:
                    doiDict[doinum[0]]['name']=doiDict[doinum[0]]['name']+[fullname]
                    doiDict[doinum[0]]['Corresp']=doiDict[doinum[0]]['Corresp']+[correspo]
                else:
                    doiDict[doinum[0]]={'name':[fullname],'Corresp':[correspo],'count':0}
                    doilist=doilist+[doinum[0]]
                
                papertitle=strip_html_tags(ReturnDictContent(dfs['paper_title'],'en','ja',''))
                papid=papertitle.upper().rstrip('.')

                # Track duplicates by title
                if papid in titlelist:
                    titleDict[papid]['name'] = titleDict[papid]['name']+[fullname]
                    titleDict[papid]['Corresp'] = titleDict[papid]['Corresp']+[correspo]                    
                else:
                    titlelist = titlelist + [papid]
                    titleDict[papid] = {'name':[fullname],'Corresp':[correspo],'count':0}

                text1="\""+papertitle+"\"" +', '
                text2=jname+','+vol+commaR(vol,spage)+spage+ ' ('+dfs["publication_date"][:4] +').'
                PapersDict[i]['text1']=text1
                PapersDict[i]['text2']=text2
                PapersDict[i]['papid']=papid
                PapersDict[i]['researcher']=fullname
                PapersDict[i]['authors']=Sname
                PapersDict[i]['date']=dfs["publication_date"]
                PapersDict[i]['referee']=ReturnDictContent(dfs,'referee','referee',False)
                PapersDict[i]['doi']=doinum[0]
                PapersDict[i]['ryoiki']=Ryoiki
                PapersDict[i]['Daihyo']=dh
                PapersDict[i]['Corresp']=correspo
                i=i+1

In [None]:
#@title Build talks dictionary
TalksDict={}
i=0
for ids,fullname,fullnameJP,dh,mindate,maxdate in zip(allmembers,allnames,allnamesJP,allDaihyoBuntan,allmindate,allmaxdate):
    dfPr = jsonfiles[ids]["presentations"]
    if 'items' in dfPr.keys():
        for dfs in dfPr['items']:
            if all([a in dfs.keys() for a in ['invited',"presentation_title","event",'publication_date',"presenters"]]):
                if (dfs["publication_date"]>=mindate) & (dfs["publication_date"]<=maxdate):
                    if dfs['invited']:
                        if ('en' in dfs["presenters"].keys()):
                            pname=dfs["presenters"]["en"][0]["name"]
                        else:
                            pname=dfs["presenters"]["ja"][0]["name"]
                        ename=strip_html_tags(ReturnDictContent(dfs["event"],'en','ja',''))
                        ptitle=strip_html_tags(ReturnDictContent(dfs["presentation_title"],'en','ja',''))
                        pdate=dfs["publication_date"]
                        TalksDict[i]={}
                        TalksDict[i]["presenter"]=fullname
                        if allenglish:
                            TalksDict[i]['printname']=fullname
                        else:
                            TalksDict[i]["printname"]=fullnameJP
                        TalksDict[i]["event"]=ename
                        TalksDict[i]["presentation_title"]=ptitle
                        TalksDict[i]["date"]=pdate
                        i=i+1

In [None]:
#@title Build books dictionary
booksDict={}
i=0
for ids,fullname,fullnameJP,dh,mindate,maxdate in zip(allmembers,allnames,allnamesJP,allDaihyoBuntan,allmindate,allmaxdate):
  dfM = jsonfiles[ids]["books_etc"]
  if 'items' in dfM.keys():
    for dfs in dfM['items']:
      if all([a in dfs.keys() for a in ['authors',"book_title","publication_date"]]):
        if (dfs["publication_date"]>=mindate) & (dfs["publication_date"]<=maxdate):
          if ('ja' in dfs["authors"].keys()):
              pname=dfs["authors"]["ja"][0]["name"]
          else:
              pname=dfs["authors"]["en"][0]["name"]
          ename=strip_html_tags(ReturnDictContent(dfs["book_title"],'ja','en',''))
          if "book_owner_range" in dfs.keys():
            eoname=" \'"+strip_html_tags(ReturnDictContent(dfs["book_owner_range"],'ja','en',''))+"\',"
          else:
            eoname=''
          if "book_owner_role" in dfs.keys():
            brole=" ("+dfs["book_owner_role"]+"),"
          else:
            brole=','
          if "publisher" in dfs.keys():
            pub=" "+strip_html_tags(ReturnDictContent(dfs["publisher"],'ja','en',''))+","
          else:
            pub=''
          pdate=dfs["publication_date"]
          booksDict[i]={}
          booksDict[i]['authors']=fullname
          if allenglish:
            booksDict[i]['printname']=fullname
          else:
            booksDict[i]['printname']=fullnameJP
          booksDict[i]["book_title"]=' \"'+ename+'\",'
          booksDict[i]["book_owner_role"]=brole
          booksDict[i]["book_owner_range"]=eoname
          booksDict[i]["publisher"]=pub
          booksDict[i]["date"]=pdate
          i=i+1

In [None]:
#@title Build misc dictionary
miscDict={}
i=0
for ids,fullname,fullnameJP,dh,mindate,maxdate in zip(allmembers,allnames,allnamesJP,allDaihyoBuntan,allmindate,allmaxdate):
  dfM = jsonfiles[ids]["misc"]
  if 'items' in dfM.keys():
    for dfs in dfM['items']:
      if all([a in dfs.keys() for a in ['authors',"paper_title","publication_date","publication_name"]]):
        if  (dfs["publication_date"]>=mindate) & (dfs["publication_date"]<=maxdate):
          if ('ja' in dfs["authors"].keys()):
              pname=dfs["authors"]["ja"][0]["name"]
          else:
              pname=dfs["authors"]["en"][0]["name"]
          ename=strip_html_tags(ReturnDictContent(dfs["paper_title"],'ja','en',''))
          ptitle=strip_html_tags(ReturnDictContent(dfs["publication_name"],'ja','en',''))
          pdate=dfs["publication_date"]
          miscDict[i]={}
          miscDict[i]['authors']=fullname
          if allenglish:
            miscDict[i]['printname']=fullname
          else:
            miscDict[i]['printname']=fullnameJP

          miscDict[i]["paper_title"]=' \''+ename+'\','
          miscDict[i]["publication_name"]=' \"'+ptitle+'\",'
          miscDict[i]["date"]=pdate
          i=i+1

In [None]:
#@title Generate docx

if peer_reviewed:
    PapersDictSelected={k:PapersDict[k] for k in range(len(PapersDict)) if (PapersDict[k]['date']>globalmindate) & (PapersDict[k]['date']<globalmaxdate)  & (PapersDict[k]['referee'])}
else:
    PapersDictSelected={k:PapersDict[k] for k in range(len(PapersDict)) if (PapersDict[k]['date']>globalmindate) & (PapersDict[k]['date']<globalmaxdate)}   

keys, arg = sort_by_date_desc(PapersDictSelected)

document = Document()
document.add_paragraph('原著論文')
inds=0
for r in arg:
    inds=inds+1
    pap=PapersDictSelected[keys[r]]
    ## to eliminate duplicates of papers
    # based on DOI
    if len(doiDict[pap['doi']]['name'])>1:
        if doiDict[pap['doi']]['count']==1:
            continue;
        titleDict[pap['papid']]['count']=1
        doiDict[pap['doi']]['count']=1
    # based on paper title
    if (len(titleDict[pap['papid']]['name'])>1):
        if titleDict[pap['papid']]['count']==1:
            continue;
        titleDict[pap['papid']]['count']=1
        doiDict[pap['doi']]['count']=1

    if pap['issues']:
        p = document.add_paragraph('***')

    if numberingPapers:
        if pap['ryoiki']:
            p = document.add_paragraph(smark+str(inds)+'. '+pap['text1'])
        else:
            p = document.add_paragraph(str(inds)+'. '+pap['text1'])
    else:
        if pap['ryoiki']:
            p = document.add_paragraph(smark+pap['text1'])
        else:
            p = document.add_paragraph(pap['text1'])
    for nm in pap['authors']:
        if nm in nameList:
            listedCorrespo = any([c for c,n in zip(doiDict[pap['doi']]['Corresp'] + titleDict[pap['papid']]['Corresp'] , doiDict[pap['doi']]['name'] + titleDict[pap['papid']]['name']) if n==nm])
            if pap['Corresp'] | listedCorrespo:
                p.add_run('*')
            add_underlined_run(p, nm, nameList, daihyobuntanList)
        else:
            p.add_run(nm)
        p.add_run(', ')
    p.add_run(pap['text2'])

for r in keys:
    doiDict[PapersDictSelected[r]['doi']]['count']=0
    titleDict[PapersDictSelected[r]['papid']]['count']=0

# --- Talks ---
TalksDictSelected={k:TalksDict[k] for k in range(len(TalksDict)) if (TalksDict[k]['date']>globalmindate) & (TalksDict[k]['date']<globalmaxdate) }
keys, arg = sort_by_date_desc(TalksDictSelected)
document.add_paragraph('')
document.add_paragraph('学会発表・講演（招待あり）')
inds=0
for r in arg:
    inds=inds+1
    pap=TalksDictSelected[keys[r]]
    p = document.add_paragraph(str(inds)+'. ')
    add_underlined_run(p, pap["presenter"], nameList, daihyobuntanList)
    p.add_run(', \"'+pap["presentation_title"]+"\"")
    p.add_run(', '+pap["event"])
    p.add_run(', '+pap["date"]+'.')

# --- Books ---
booksDictSelected={k:booksDict[k] for k in range(len(booksDict)) if (booksDict[k]['date']>globalmindate) & (booksDict[k]['date']<globalmaxdate) }
keys, arg = sort_by_date_desc(booksDictSelected)
document.add_paragraph('')
document.add_paragraph('書籍')
inds=0
for r in arg:
    inds=inds+1
    pap=booksDictSelected[keys[r]]
    p = document.add_paragraph(str(inds)+'. ')
    p.add_run(pap["printname"]) 
    p.add_run(pap["book_owner_role"])
    p.add_run(pap["book_owner_range"])
    p.add_run(pap["book_title"])
    p.add_run(pap["publisher"])
    p.add_run(' '+pap["date"][:7]+'.')

# --- Misc ---
miscDictSelected={k:miscDict[k] for k in range(len(miscDict)) if (miscDict[k]['date']>globalmindate) & (miscDict[k]['date']<globalmaxdate) }
keys, arg = sort_by_date_desc(miscDictSelected)
document.add_paragraph('')
document.add_paragraph('その他')
inds=0
for r in arg:
    inds=inds+1
    pap=miscDictSelected[keys[r]]
    p = document.add_paragraph(str(inds)+'. ')
    add_underlined_run(p, pap['authors'], nameList, daihyobuntanList)
    p.add_run(','+pap["paper_title"])
    p.add_run(pap["publication_name"])
    p.add_run(' '+pap["date"]+'.')

try:
    document.save(file_name)
except PermissionError:
    from datetime import datetime
    stem, ext = os.path.splitext(file_name)
    file_name = stem + '_' + datetime.now().strftime('%Y%m%d_%H%M%S') + ext
    document.save(file_name)
    print('Original file was locked (open in another app?). Saved as: ' + file_name)

In [None]:
#@title Download file
if 'google.colab' in str(get_ipython()):
    files.download(file_name)