# The Unconquerables of Open Access

## Merging MEDLINE journals with DOAJ data

Project for the EAHIL conference 2023 : https://eahil2023.org/  
Authors : **Floriane Muller & Pablo Iriarte**, University of Geneva  
Last update : 26.04.2023  

This notebook is used to add DOAJ information for MEDLINE journals.

## Sources

* MEDLINE journals enriched with Sherpa/Romeo data
* The DOAJ metadata was downloaded on 12.04.2023 from https://doaj.org/docs/public-data-dump/
* Data for journals added and  removed was downloaded from https://doaj.org/docs/faq/#the-journal-i-am-looking-for-isnt-in-doaj-why

    DOAJ CSV files:
      * journalcsv__doaj_20230412_1535_utf8.csv
      * DOAJ journals added and removed - Removed.tsv

In [1]:
import pandas as pd
# display the full content of rows (non truncated)
pd.set_option('display.max_colwidth', -1)
# display all the columns
pd.set_option('display.max_columns', None)

In [2]:
# Extract ISSN, seal and year from DOAJ
doaj = pd.read_csv('data/sources/doaj/journalcsv__doaj_20230412_1535_utf8.csv', delimiter=',', header=0,
                   usecols=['Journal title', 'Journal ISSN (print version)', 'Journal EISSN (online version)', 'Added on Date', 'DOAJ Seal', 'Journal license', 'Author holds copyright without restrictions', 'APC', 'Last updated Date'])
doaj

Unnamed: 0,Journal title,Journal ISSN (print version),Journal EISSN (online version),Journal license,Author holds copyright without restrictions,APC,DOAJ Seal,Added on Date,Last updated Date
0,Anais da Academia Brasileira de Ciências,0001-3765,1678-2690,CC BY,No,No,No,2004-04-23T21:31:00Z,2017-01-04T14:19:54Z
1,ACME,0001-494X,2282-0035,CC BY-NC-ND,Yes,No,No,2014-12-22T19:55:58Z,2022-08-10T19:13:04Z
2,Acta Biochimica Polonica,0001-527X,1734-154X,CC BY-SA,Yes,Yes,No,2022-05-23T10:09:09Z,2022-05-23T10:09:09Z
3,Acta Dermato-Venereologica,0001-5555,1651-2057,"CC BY, CC BY-NC",Yes,Yes,No,2011-11-10T12:31:05Z,2023-01-19T09:14:05Z
4,Acta Médica Costarricense,0001-6012,2215-5856,CC BY-NC-SA,No,No,No,2020-12-22T11:08:24Z,2022-07-29T11:54:21Z
...,...,...,...,...,...,...,...,...,...
19189,Football(s),2967-0837,2968-0115,CC BY,Yes,No,No,2023-01-14T13:39:37Z,2023-01-14T13:39:37Z
19190,"International Journal of Home Economics, Hospitality and Allied Research",2971-5121,,CC BY,Yes,Yes,No,2022-11-13T20:06:16Z,2022-11-19T14:23:23Z
19191,Papireto,,2974-668X,CC BY-ND,Yes,No,No,2023-03-23T17:17:20Z,2023-03-23T17:17:20Z
19192,Advances in Civil and Architectural Engineering,,2975-3848,CC BY,Yes,No,No,2022-11-21T11:18:02Z,2022-11-21T11:18:02Z


In [3]:
# rename columns for practical short names
doaj = doaj.rename(columns = {'Journal title': 'DOAJ_title'})
doaj = doaj.rename(columns = {'Added on Date': 'DOAJ_date'})
doaj = doaj.rename(columns = {'DOAJ Seal': 'DOAJ_Seal'})
doaj = doaj.rename(columns = {'Journal ISSN (print version)': 'DOAJ_pissn'})
doaj = doaj.rename(columns = {'Journal EISSN (online version)': 'DOAJ_eissn'})
doaj = doaj.rename(columns = {'Journal license': 'DOAJ_license'})
doaj = doaj.rename(columns = {'Author holds copyright without restrictions': 'DOAJ_author_holds_copyright_without_restrictions'})
doaj = doaj.rename(columns = {'APC': 'DOAJ_APC'})
doaj = doaj.rename(columns = {'Last updated Date': 'DOAJ_last_updated'})
doaj

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated
0,Anais da Academia Brasileira de Ciências,0001-3765,1678-2690,CC BY,No,No,No,2004-04-23T21:31:00Z,2017-01-04T14:19:54Z
1,ACME,0001-494X,2282-0035,CC BY-NC-ND,Yes,No,No,2014-12-22T19:55:58Z,2022-08-10T19:13:04Z
2,Acta Biochimica Polonica,0001-527X,1734-154X,CC BY-SA,Yes,Yes,No,2022-05-23T10:09:09Z,2022-05-23T10:09:09Z
3,Acta Dermato-Venereologica,0001-5555,1651-2057,"CC BY, CC BY-NC",Yes,Yes,No,2011-11-10T12:31:05Z,2023-01-19T09:14:05Z
4,Acta Médica Costarricense,0001-6012,2215-5856,CC BY-NC-SA,No,No,No,2020-12-22T11:08:24Z,2022-07-29T11:54:21Z
...,...,...,...,...,...,...,...,...,...
19189,Football(s),2967-0837,2968-0115,CC BY,Yes,No,No,2023-01-14T13:39:37Z,2023-01-14T13:39:37Z
19190,"International Journal of Home Economics, Hospitality and Allied Research",2971-5121,,CC BY,Yes,Yes,No,2022-11-13T20:06:16Z,2022-11-19T14:23:23Z
19191,Papireto,,2974-668X,CC BY-ND,Yes,No,No,2023-03-23T17:17:20Z,2023-03-23T17:17:20Z
19192,Advances in Civil and Architectural Engineering,,2975-3848,CC BY,Yes,No,No,2022-11-21T11:18:02Z,2022-11-21T11:18:02Z


In [4]:
# extract year
doaj['DOAJ_year'] = doaj['DOAJ_date'].str[:4]
doaj

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year
0,Anais da Academia Brasileira de Ciências,0001-3765,1678-2690,CC BY,No,No,No,2004-04-23T21:31:00Z,2017-01-04T14:19:54Z,2004
1,ACME,0001-494X,2282-0035,CC BY-NC-ND,Yes,No,No,2014-12-22T19:55:58Z,2022-08-10T19:13:04Z,2014
2,Acta Biochimica Polonica,0001-527X,1734-154X,CC BY-SA,Yes,Yes,No,2022-05-23T10:09:09Z,2022-05-23T10:09:09Z,2022
3,Acta Dermato-Venereologica,0001-5555,1651-2057,"CC BY, CC BY-NC",Yes,Yes,No,2011-11-10T12:31:05Z,2023-01-19T09:14:05Z,2011
4,Acta Médica Costarricense,0001-6012,2215-5856,CC BY-NC-SA,No,No,No,2020-12-22T11:08:24Z,2022-07-29T11:54:21Z,2020
...,...,...,...,...,...,...,...,...,...,...
19189,Football(s),2967-0837,2968-0115,CC BY,Yes,No,No,2023-01-14T13:39:37Z,2023-01-14T13:39:37Z,2023
19190,"International Journal of Home Economics, Hospitality and Allied Research",2971-5121,,CC BY,Yes,Yes,No,2022-11-13T20:06:16Z,2022-11-19T14:23:23Z,2022
19191,Papireto,,2974-668X,CC BY-ND,Yes,No,No,2023-03-23T17:17:20Z,2023-03-23T17:17:20Z,2023
19192,Advances in Civil and Architectural Engineering,,2975-3848,CC BY,Yes,No,No,2022-11-21T11:18:02Z,2022-11-21T11:18:02Z,2022


In [5]:
doaj['DOAJ_year'].dtype

dtype('O')

In [6]:
# use pissn if eissn is null
doaj['ISSN'] = doaj['DOAJ_eissn']
doaj.loc[doaj['DOAJ_eissn'].isnull(), 'ISSN'] = doaj['DOAJ_pissn']
doaj

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year,ISSN
0,Anais da Academia Brasileira de Ciências,0001-3765,1678-2690,CC BY,No,No,No,2004-04-23T21:31:00Z,2017-01-04T14:19:54Z,2004,1678-2690
1,ACME,0001-494X,2282-0035,CC BY-NC-ND,Yes,No,No,2014-12-22T19:55:58Z,2022-08-10T19:13:04Z,2014,2282-0035
2,Acta Biochimica Polonica,0001-527X,1734-154X,CC BY-SA,Yes,Yes,No,2022-05-23T10:09:09Z,2022-05-23T10:09:09Z,2022,1734-154X
3,Acta Dermato-Venereologica,0001-5555,1651-2057,"CC BY, CC BY-NC",Yes,Yes,No,2011-11-10T12:31:05Z,2023-01-19T09:14:05Z,2011,1651-2057
4,Acta Médica Costarricense,0001-6012,2215-5856,CC BY-NC-SA,No,No,No,2020-12-22T11:08:24Z,2022-07-29T11:54:21Z,2020,2215-5856
...,...,...,...,...,...,...,...,...,...,...,...
19189,Football(s),2967-0837,2968-0115,CC BY,Yes,No,No,2023-01-14T13:39:37Z,2023-01-14T13:39:37Z,2023,2968-0115
19190,"International Journal of Home Economics, Hospitality and Allied Research",2971-5121,,CC BY,Yes,Yes,No,2022-11-13T20:06:16Z,2022-11-19T14:23:23Z,2022,2971-5121
19191,Papireto,,2974-668X,CC BY-ND,Yes,No,No,2023-03-23T17:17:20Z,2023-03-23T17:17:20Z,2023,2974-668X
19192,Advances in Civil and Architectural Engineering,,2975-3848,CC BY,Yes,No,No,2022-11-21T11:18:02Z,2022-11-21T11:18:02Z,2022,2975-3848


In [7]:
doaj.loc[doaj['ISSN'].isnull()]

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year,ISSN


In [8]:
doaj.loc[doaj['DOAJ_year'].isna()]

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year,ISSN


In [9]:
doaj.loc[doaj['DOAJ_title'].isna()]

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year,ISSN


In [10]:
doaj.loc[doaj['DOAJ_title'].notna()]

Unnamed: 0,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year,ISSN
0,Anais da Academia Brasileira de Ciências,0001-3765,1678-2690,CC BY,No,No,No,2004-04-23T21:31:00Z,2017-01-04T14:19:54Z,2004,1678-2690
1,ACME,0001-494X,2282-0035,CC BY-NC-ND,Yes,No,No,2014-12-22T19:55:58Z,2022-08-10T19:13:04Z,2014,2282-0035
2,Acta Biochimica Polonica,0001-527X,1734-154X,CC BY-SA,Yes,Yes,No,2022-05-23T10:09:09Z,2022-05-23T10:09:09Z,2022,1734-154X
3,Acta Dermato-Venereologica,0001-5555,1651-2057,"CC BY, CC BY-NC",Yes,Yes,No,2011-11-10T12:31:05Z,2023-01-19T09:14:05Z,2011,1651-2057
4,Acta Médica Costarricense,0001-6012,2215-5856,CC BY-NC-SA,No,No,No,2020-12-22T11:08:24Z,2022-07-29T11:54:21Z,2020,2215-5856
...,...,...,...,...,...,...,...,...,...,...,...
19189,Football(s),2967-0837,2968-0115,CC BY,Yes,No,No,2023-01-14T13:39:37Z,2023-01-14T13:39:37Z,2023,2968-0115
19190,"International Journal of Home Economics, Hospitality and Allied Research",2971-5121,,CC BY,Yes,Yes,No,2022-11-13T20:06:16Z,2022-11-19T14:23:23Z,2022,2971-5121
19191,Papireto,,2974-668X,CC BY-ND,Yes,No,No,2023-03-23T17:17:20Z,2023-03-23T17:17:20Z,2023,2974-668X
19192,Advances in Civil and Architectural Engineering,,2975-3848,CC BY,Yes,No,No,2022-11-21T11:18:02Z,2022-11-21T11:18:02Z,2022,2975-3848


## Merge with PubMed data

In [11]:
pubmed = pd.read_csv('data/temp/2023/merge_pubmed_sherpa.tsv', encoding='utf-8', header=0, sep='\t')
pubmed

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id
0,9015384,20 century British history,20 Century Br Hist,England,"Eynsham, Oxford",Oxford University Press,1990,,"4 no. a year,",1477-4674,0955-2359,0955-2359,eng,N,1990.0,Y,N,QIS,,Y,1477-4674,yes,24.0,,24.0,yes,yes,Twentieth Century British History,1477-4674,0955-2359,https://academic.oup.com/tcbh,55.0,gb,university_publisher,https://academic.oup.com/journals/,Oxford University Press,https://v2.sherpa.ac.uk/id/publisher_policy/1112,2010-07-15 16:04:39,2022-07-26 10:25:23,1406.0
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,
2,101269322,AACN advanced critical care,AACN Adv Crit Care,United States,"Aliso Viejo, CA",American Association of Critical-Care Nurses (AACN),2006,,Quarterly,1559-7776,1559-7768,1559-7768,eng,Y,2006.0,Y,Y,N,https://aacnjournals.org/aacnacconline,Y,1559-7776,yes,,,,,,AACN Advanced Critical Care,1559-7776,1559-7768,http://acc.aacnjournals.org/,663.0,us,society_publisher,https://www.aacn.org/,American Association of Critical Care Nurses,https://v2.sherpa.ac.uk/id/publisher_policy/663,2010-08-24 15:05:09,2022-07-08 08:42:33,10921.0
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,
4,101223209,The AAPS journal,AAPS J,United States,"Arlington, Va., USA",American Association of Pharmaceutical Scientists,2004,,Four no. a year,1550-7416,,1550-7416,eng,Y,2004.0,Y,Y,IM,https://link.springer.com/journal/12248,Y,1550-7416,yes,12.0,,12.0,yes,yes,AAPS Journal,,1550-7416,http://link.springer.com/journal/12248,313.0,us,client_organisation,https://www.aaps.org/home,American Association of Pharmaceutical Scientists,https://v2.sherpa.ac.uk/id/publisher_policy/3291,2010-09-15 13:16:19,2023-01-05 14:55:40,16180.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5274,8702287,Zoological science,Zoolog Sci,Japan,"Tokyo, Japan",Zoological Society of Japan,1984,,"Monthly,",,0289-0003,0289-0003,eng,N,2002.0,Y,Y,IM,http://www.bioone.org/loi/jzoo,Y,0289-0003,,,,,,,,,,,,,,,,,,,
5275,9435608,"Zoology (Jena, Germany)",Zoology (Jena),Germany,"Jena, Germany",Urban & Fischer,1994,,"Six no. a year,",1873-2720,0944-2006,0944-2006,eng,N,2005.0,Y,Y,IM,https://www.sciencedirect.com/journal/zoology,Y,1873-2720,yes,12.0,,12.0,yes,yes,,,,,,,,,,,,,15919.0
5276,101300786,Zoonoses and public health,Zoonoses Public Health,Germany,"Berlin, Germany",Blackwell Verlag,2007,,Ten no. a year,1863-2378,1863-1959,1863-1959,eng,Y,2007.0,Y,Y,IM,http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1863-2378,Y,1863-2378,yes,12.0,,12.0,yes,yes,Zoonoses and Public Health,1863-2378,1863-1959,https://onlinelibrary.wiley.com/journal/18632378,580.0,us,commercial_publisher,https://www.wiley.com/en-gb,Wiley,https://v2.sherpa.ac.uk/id/publisher_policy/2050,2010-07-20 14:58:33,2022-07-27 12:37:58,2555.0
5277,101179386,Zootaxa,Zootaxa,New Zealand,"Auckland, N.Z.",Magnolia Press,2001,,Irregular,1175-5334,1175-5326,1175-5326,eng,N,2013.0,Y,Y,IM,http://www.mapress.com/j/zt/,Y,1175-5334,yes,,,,,yes,Zootaxa,1175-5334,1175-5326,https://www.mapress.com/zt/,284.0,nz,commercial_publisher,https://www.mapress.com/,Magnolia Press,https://v2.sherpa.ac.uk/id/publisher_policy/284,2010-06-30 17:47:50,2022-07-15 08:53:17,6.0


In [12]:
pubmed = pd.merge(pubmed, doaj, on='ISSN', how='left')
pubmed

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year
0,9015384,20 century British history,20 Century Br Hist,England,"Eynsham, Oxford",Oxford University Press,1990,,"4 no. a year,",1477-4674,0955-2359,0955-2359,eng,N,1990.0,Y,N,QIS,,Y,1477-4674,yes,24.0,,24.0,yes,yes,Twentieth Century British History,1477-4674,0955-2359,https://academic.oup.com/tcbh,55.0,gb,university_publisher,https://academic.oup.com/journals/,Oxford University Press,https://v2.sherpa.ac.uk/id/publisher_policy/1112,2010-07-15 16:04:39,2022-07-26 10:25:23,1406.0,,,,,,,,,,
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,101269322,AACN advanced critical care,AACN Adv Crit Care,United States,"Aliso Viejo, CA",American Association of Critical-Care Nurses (AACN),2006,,Quarterly,1559-7776,1559-7768,1559-7768,eng,Y,2006.0,Y,Y,N,https://aacnjournals.org/aacnacconline,Y,1559-7776,yes,,,,,,AACN Advanced Critical Care,1559-7776,1559-7768,http://acc.aacnjournals.org/,663.0,us,society_publisher,https://www.aacn.org/,American Association of Critical Care Nurses,https://v2.sherpa.ac.uk/id/publisher_policy/663,2010-08-24 15:05:09,2022-07-08 08:42:33,10921.0,,,,,,,,,,
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,101223209,The AAPS journal,AAPS J,United States,"Arlington, Va., USA",American Association of Pharmaceutical Scientists,2004,,Four no. a year,1550-7416,,1550-7416,eng,Y,2004.0,Y,Y,IM,https://link.springer.com/journal/12248,Y,1550-7416,yes,12.0,,12.0,yes,yes,AAPS Journal,,1550-7416,http://link.springer.com/journal/12248,313.0,us,client_organisation,https://www.aaps.org/home,American Association of Pharmaceutical Scientists,https://v2.sherpa.ac.uk/id/publisher_policy/3291,2010-09-15 13:16:19,2023-01-05 14:55:40,16180.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5274,8702287,Zoological science,Zoolog Sci,Japan,"Tokyo, Japan",Zoological Society of Japan,1984,,"Monthly,",,0289-0003,0289-0003,eng,N,2002.0,Y,Y,IM,http://www.bioone.org/loi/jzoo,Y,0289-0003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5275,9435608,"Zoology (Jena, Germany)",Zoology (Jena),Germany,"Jena, Germany",Urban & Fischer,1994,,"Six no. a year,",1873-2720,0944-2006,0944-2006,eng,N,2005.0,Y,Y,IM,https://www.sciencedirect.com/journal/zoology,Y,1873-2720,yes,12.0,,12.0,yes,yes,,,,,,,,,,,,,15919.0,,,,,,,,,,
5276,101300786,Zoonoses and public health,Zoonoses Public Health,Germany,"Berlin, Germany",Blackwell Verlag,2007,,Ten no. a year,1863-2378,1863-1959,1863-1959,eng,Y,2007.0,Y,Y,IM,http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1863-2378,Y,1863-2378,yes,12.0,,12.0,yes,yes,Zoonoses and Public Health,1863-2378,1863-1959,https://onlinelibrary.wiley.com/journal/18632378,580.0,us,commercial_publisher,https://www.wiley.com/en-gb,Wiley,https://v2.sherpa.ac.uk/id/publisher_policy/2050,2010-07-20 14:58:33,2022-07-27 12:37:58,2555.0,,,,,,,,,,
5277,101179386,Zootaxa,Zootaxa,New Zealand,"Auckland, N.Z.",Magnolia Press,2001,,Irregular,1175-5334,1175-5326,1175-5326,eng,N,2013.0,Y,Y,IM,http://www.mapress.com/j/zt/,Y,1175-5334,yes,,,,,yes,Zootaxa,1175-5334,1175-5326,https://www.mapress.com/zt/,284.0,nz,commercial_publisher,https://www.mapress.com/,Magnolia Press,https://v2.sherpa.ac.uk/id/publisher_policy/284,2010-06-30 17:47:50,2022-07-15 08:53:17,6.0,,,,,,,,,,


In [13]:
# test Rev Med
pubmed.loc[pubmed['MedlineTA'] == 'Rev Med Suisse']

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_title,DOAJ_pissn,DOAJ_eissn,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_APC,DOAJ_Seal,DOAJ_date,DOAJ_last_updated,DOAJ_year
4678,101219148,Revue médicale suisse,Rev Med Suisse,Switzerland,Genève,Médecine et Hygiène,2005,,Weekly (46 issues per year),,1660-9379,1660-9379,fre,Y,2005.0,Y,N,IM,,Y,1660-9379,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [14]:
# merge by eISSN
doaj = doaj.rename(columns={'ISSN' : 'ISSN_old', 'DOAJ_eissn' : 'ISSN'})

In [15]:
# merge by eISSN
pubmed = pd.merge(pubmed, doaj, on='ISSN', how='left')
pubmed

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_title_x,DOAJ_pissn_x,DOAJ_eissn,DOAJ_license_x,DOAJ_author_holds_copyright_without_restrictions_x,DOAJ_APC_x,DOAJ_Seal_x,DOAJ_date_x,DOAJ_last_updated_x,DOAJ_year_x,DOAJ_title_y,DOAJ_pissn_y,DOAJ_license_y,DOAJ_author_holds_copyright_without_restrictions_y,DOAJ_APC_y,DOAJ_Seal_y,DOAJ_date_y,DOAJ_last_updated_y,DOAJ_year_y,ISSN_old
0,9015384,20 century British history,20 Century Br Hist,England,"Eynsham, Oxford",Oxford University Press,1990,,"4 no. a year,",1477-4674,0955-2359,0955-2359,eng,N,1990.0,Y,N,QIS,,Y,1477-4674,yes,24.0,,24.0,yes,yes,Twentieth Century British History,1477-4674,0955-2359,https://academic.oup.com/tcbh,55.0,gb,university_publisher,https://academic.oup.com/journals/,Oxford University Press,https://v2.sherpa.ac.uk/id/publisher_policy/1112,2010-07-15 16:04:39,2022-07-26 10:25:23,1406.0,,,,,,,,,,,,,,,,,,,,
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,101269322,AACN advanced critical care,AACN Adv Crit Care,United States,"Aliso Viejo, CA",American Association of Critical-Care Nurses (AACN),2006,,Quarterly,1559-7776,1559-7768,1559-7768,eng,Y,2006.0,Y,Y,N,https://aacnjournals.org/aacnacconline,Y,1559-7776,yes,,,,,,AACN Advanced Critical Care,1559-7776,1559-7768,http://acc.aacnjournals.org/,663.0,us,society_publisher,https://www.aacn.org/,American Association of Critical Care Nurses,https://v2.sherpa.ac.uk/id/publisher_policy/663,2010-08-24 15:05:09,2022-07-08 08:42:33,10921.0,,,,,,,,,,,,,,,,,,,,
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,101223209,The AAPS journal,AAPS J,United States,"Arlington, Va., USA",American Association of Pharmaceutical Scientists,2004,,Four no. a year,1550-7416,,1550-7416,eng,Y,2004.0,Y,Y,IM,https://link.springer.com/journal/12248,Y,1550-7416,yes,12.0,,12.0,yes,yes,AAPS Journal,,1550-7416,http://link.springer.com/journal/12248,313.0,us,client_organisation,https://www.aaps.org/home,American Association of Pharmaceutical Scientists,https://v2.sherpa.ac.uk/id/publisher_policy/3291,2010-09-15 13:16:19,2023-01-05 14:55:40,16180.0,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5274,8702287,Zoological science,Zoolog Sci,Japan,"Tokyo, Japan",Zoological Society of Japan,1984,,"Monthly,",,0289-0003,0289-0003,eng,N,2002.0,Y,Y,IM,http://www.bioone.org/loi/jzoo,Y,0289-0003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5275,9435608,"Zoology (Jena, Germany)",Zoology (Jena),Germany,"Jena, Germany",Urban & Fischer,1994,,"Six no. a year,",1873-2720,0944-2006,0944-2006,eng,N,2005.0,Y,Y,IM,https://www.sciencedirect.com/journal/zoology,Y,1873-2720,yes,12.0,,12.0,yes,yes,,,,,,,,,,,,,15919.0,,,,,,,,,,,,,,,,,,,,
5276,101300786,Zoonoses and public health,Zoonoses Public Health,Germany,"Berlin, Germany",Blackwell Verlag,2007,,Ten no. a year,1863-2378,1863-1959,1863-1959,eng,Y,2007.0,Y,Y,IM,http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1863-2378,Y,1863-2378,yes,12.0,,12.0,yes,yes,Zoonoses and Public Health,1863-2378,1863-1959,https://onlinelibrary.wiley.com/journal/18632378,580.0,us,commercial_publisher,https://www.wiley.com/en-gb,Wiley,https://v2.sherpa.ac.uk/id/publisher_policy/2050,2010-07-20 14:58:33,2022-07-27 12:37:58,2555.0,,,,,,,,,,,,,,,,,,,,
5277,101179386,Zootaxa,Zootaxa,New Zealand,"Auckland, N.Z.",Magnolia Press,2001,,Irregular,1175-5334,1175-5326,1175-5326,eng,N,2013.0,Y,Y,IM,http://www.mapress.com/j/zt/,Y,1175-5334,yes,,,,,yes,Zootaxa,1175-5334,1175-5326,https://www.mapress.com/zt/,284.0,nz,commercial_publisher,https://www.mapress.com/,Magnolia Press,https://v2.sherpa.ac.uk/id/publisher_policy/284,2010-06-30 17:47:50,2022-07-15 08:53:17,6.0,,,,,,,,,,,,,,,,,,,,


In [16]:
# test of new merged rows
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna()]

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_title_x,DOAJ_pissn_x,DOAJ_eissn,DOAJ_license_x,DOAJ_author_holds_copyright_without_restrictions_x,DOAJ_APC_x,DOAJ_Seal_x,DOAJ_date_x,DOAJ_last_updated_x,DOAJ_year_x,DOAJ_title_y,DOAJ_pissn_y,DOAJ_license_y,DOAJ_author_holds_copyright_without_restrictions_y,DOAJ_APC_y,DOAJ_Seal_y,DOAJ_date_y,DOAJ_last_updated_y,DOAJ_year_y,ISSN_old


In [17]:
# add new merged data
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_title'] = pubmed['DOAJ_title_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_pissn'] = pubmed['DOAJ_pissn_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_eissn'] = pubmed['ISSN']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_Seal'] = pubmed['DOAJ_Seal_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_date'] = pubmed['DOAJ_date_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_year'] = pubmed['DOAJ_year_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_license'] = pubmed['DOAJ_license_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_author_holds_copyright_without_restrictions'] = pubmed['DOAJ_author_holds_copyright_without_restrictions_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_last_updated'] = pubmed['DOAJ_last_updated_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_APC'] = pubmed['DOAJ_APC_x']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_title'] = pubmed['DOAJ_title_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_pissn'] = pubmed['DOAJ_pissn_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_eissn'] = pubmed['ISSN']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_Seal'] = pubmed['DOAJ_Seal_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_date'] = pubmed['DOAJ_date_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_year'] = pubmed['DOAJ_year_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_license'] = pubmed['DOAJ_license_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_author_holds_copyright_without_restrictions'] = pubmed['DOAJ_author_holds_copyright_without_restrictions_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_last_updated'] = pubmed['DOAJ_last_updated_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_APC'] = pubmed['DOAJ_APC_y']
del pubmed['DOAJ_title_x']
del pubmed['DOAJ_pissn_x']
del pubmed['DOAJ_Seal_x']
del pubmed['DOAJ_date_x']
del pubmed['DOAJ_year_x']
del pubmed['DOAJ_license_x']
del pubmed['DOAJ_author_holds_copyright_without_restrictions_x']
del pubmed['DOAJ_last_updated_x']
del pubmed['DOAJ_APC_x']
del pubmed['DOAJ_title_y']
del pubmed['DOAJ_pissn_y']
del pubmed['DOAJ_Seal_y']
del pubmed['DOAJ_date_y']
del pubmed['DOAJ_year_y']
del pubmed['DOAJ_license_y']
del pubmed['DOAJ_author_holds_copyright_without_restrictions_y']
del pubmed['DOAJ_last_updated_y']
del pubmed['DOAJ_APC_y']
del pubmed['ISSN_old']

In [18]:
# merge by pISSN
doaj = doaj.rename(columns={'ISSN' : 'DOAJ_eissn', 'DOAJ_pissn' : 'ISSN'})
pubmed = pd.merge(pubmed, doaj, on='ISSN', how='left')
pubmed

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_eissn_x,DOAJ_title_x,DOAJ_pissn,DOAJ_Seal_x,DOAJ_date_x,DOAJ_year_x,DOAJ_license_x,DOAJ_author_holds_copyright_without_restrictions_x,DOAJ_last_updated_x,DOAJ_APC_x,DOAJ_title_y,DOAJ_eissn_y,DOAJ_license_y,DOAJ_author_holds_copyright_without_restrictions_y,DOAJ_APC_y,DOAJ_Seal_y,DOAJ_date_y,DOAJ_last_updated_y,DOAJ_year_y,ISSN_old
0,9015384,20 century British history,20 Century Br Hist,England,"Eynsham, Oxford",Oxford University Press,1990,,"4 no. a year,",1477-4674,0955-2359,0955-2359,eng,N,1990.0,Y,N,QIS,,Y,1477-4674,yes,24.0,,24.0,yes,yes,Twentieth Century British History,1477-4674,0955-2359,https://academic.oup.com/tcbh,55.0,gb,university_publisher,https://academic.oup.com/journals/,Oxford University Press,https://v2.sherpa.ac.uk/id/publisher_policy/1112,2010-07-15 16:04:39,2022-07-26 10:25:23,1406.0,,,,,,,,,,,,,,,,,,,,
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,101269322,AACN advanced critical care,AACN Adv Crit Care,United States,"Aliso Viejo, CA",American Association of Critical-Care Nurses (AACN),2006,,Quarterly,1559-7776,1559-7768,1559-7768,eng,Y,2006.0,Y,Y,N,https://aacnjournals.org/aacnacconline,Y,1559-7776,yes,,,,,,AACN Advanced Critical Care,1559-7776,1559-7768,http://acc.aacnjournals.org/,663.0,us,society_publisher,https://www.aacn.org/,American Association of Critical Care Nurses,https://v2.sherpa.ac.uk/id/publisher_policy/663,2010-08-24 15:05:09,2022-07-08 08:42:33,10921.0,,,,,,,,,,,,,,,,,,,,
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,101223209,The AAPS journal,AAPS J,United States,"Arlington, Va., USA",American Association of Pharmaceutical Scientists,2004,,Four no. a year,1550-7416,,1550-7416,eng,Y,2004.0,Y,Y,IM,https://link.springer.com/journal/12248,Y,1550-7416,yes,12.0,,12.0,yes,yes,AAPS Journal,,1550-7416,http://link.springer.com/journal/12248,313.0,us,client_organisation,https://www.aaps.org/home,American Association of Pharmaceutical Scientists,https://v2.sherpa.ac.uk/id/publisher_policy/3291,2010-09-15 13:16:19,2023-01-05 14:55:40,16180.0,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5274,8702287,Zoological science,Zoolog Sci,Japan,"Tokyo, Japan",Zoological Society of Japan,1984,,"Monthly,",,0289-0003,0289-0003,eng,N,2002.0,Y,Y,IM,http://www.bioone.org/loi/jzoo,Y,0289-0003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5275,9435608,"Zoology (Jena, Germany)",Zoology (Jena),Germany,"Jena, Germany",Urban & Fischer,1994,,"Six no. a year,",1873-2720,0944-2006,0944-2006,eng,N,2005.0,Y,Y,IM,https://www.sciencedirect.com/journal/zoology,Y,1873-2720,yes,12.0,,12.0,yes,yes,,,,,,,,,,,,,15919.0,,,,,,,,,,,,,,,,,,,,
5276,101300786,Zoonoses and public health,Zoonoses Public Health,Germany,"Berlin, Germany",Blackwell Verlag,2007,,Ten no. a year,1863-2378,1863-1959,1863-1959,eng,Y,2007.0,Y,Y,IM,http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1863-2378,Y,1863-2378,yes,12.0,,12.0,yes,yes,Zoonoses and Public Health,1863-2378,1863-1959,https://onlinelibrary.wiley.com/journal/18632378,580.0,us,commercial_publisher,https://www.wiley.com/en-gb,Wiley,https://v2.sherpa.ac.uk/id/publisher_policy/2050,2010-07-20 14:58:33,2022-07-27 12:37:58,2555.0,,,,,,,,,,,,,,,,,,,,
5277,101179386,Zootaxa,Zootaxa,New Zealand,"Auckland, N.Z.",Magnolia Press,2001,,Irregular,1175-5334,1175-5326,1175-5326,eng,N,2013.0,Y,Y,IM,http://www.mapress.com/j/zt/,Y,1175-5334,yes,,,,,yes,Zootaxa,1175-5334,1175-5326,https://www.mapress.com/zt/,284.0,nz,commercial_publisher,https://www.mapress.com/,Magnolia Press,https://v2.sherpa.ac.uk/id/publisher_policy/284,2010-06-30 17:47:50,2022-07-15 08:53:17,6.0,,,,,,,,,,,,,,,,,,,,


In [19]:
# test of new merged rows
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna()]

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_eissn_x,DOAJ_title_x,DOAJ_pissn,DOAJ_Seal_x,DOAJ_date_x,DOAJ_year_x,DOAJ_license_x,DOAJ_author_holds_copyright_without_restrictions_x,DOAJ_last_updated_x,DOAJ_APC_x,DOAJ_title_y,DOAJ_eissn_y,DOAJ_license_y,DOAJ_author_holds_copyright_without_restrictions_y,DOAJ_APC_y,DOAJ_Seal_y,DOAJ_date_y,DOAJ_last_updated_y,DOAJ_year_y,ISSN_old
55,7901042,Acta medica Indonesiana,Acta Med Indones,Indonesia,"Jakarta, Indonesia",Indonesian Society of Internal Medicine,1968,,"Quarterly,",,0125-9326,0125-9326,eng,N,2004.0,Y,Y,IM,http://www.actamedindones.org/index.php/ijim,Y,0125-9326,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Acta Medica Indonesiana,2338-2732,CC BY,Yes,Yes,No,2013-10-03T13:46:32Z,2021-06-15T09:21:27Z,2013,2338-2732
121,101138582,Advances in clinical and experimental medicine : official organ Wroclaw Medical University,Adv Clin Exp Med,Poland,"Wroclaw, Poland",The University,1998,,"Bimonthly,",,1899-5276,1899-5276,eng,N,2012.0,Y,N,IM,,Y,1899-5276,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Advances in Clinical and Experimental Medicine,2451-2680,CC BY,Yes,Yes,No,2022-07-13T12:40:40Z,2022-07-13T12:40:40Z,2022,2451-2680
379,101155885,Annals of hepatology,Ann Hepatol,Mexico,México,Fundación Clínica Médica Sur,2002,,"Bimonthly,",,1665-2681,1665-2681,eng,N,2002.0,Y,Y,IM,http://new.medigraphic.com/cgi-bin/publicaciones.cgi?IDREVISTA=13&NOMBRE=Annals%20of%20Hepatology,Y,1665-2681,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Annals of Hepatology,2659-5982,"CC BY, CC BY-NC-ND",No,Yes,No,2021-05-28T12:51:37Z,2021-05-28T12:51:37Z,2021,2659-5982
1098,373152,Chimia,Chimia (Aarau),Switzerland,Bern,Swiss Chemical Society,1947,,Monthly,,0009-4293,0009-4293,ger,N,2010.0,Y,N,IM,,Y,0009-4293,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,CHIMIA,2673-2424,"CC BY, CC BY-NC",Yes,No,No,2020-07-13T08:29:22Z,2022-11-17T10:24:37Z,2020,2673-2424
1974,10333,Gaceta médica de México,Gac Med Mex,Mexico,México,Unidad de Congresos del Centro Médico Nacional.,1864,,Bimonthly,,0016-3813,0016-3813,spa,N,1964.0,Y,Y,IM,https://www.gacetamedicademexico.com/,Y,0016-3813,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Gaceta Médica de México,2696-1288,CC BY-NC-ND,No,No,No,2021-03-26T14:11:14Z,2022-08-05T14:45:22Z,2021,2696-1288
2178,9512509,Hong Kong medical journal = Xianggang yi xue za zhi,Hong Kong Med J,China,Hong Kong,Hong Kong Academy of Medicine Press,1995,,Quarterly,,1024-2708,1024-2708,eng,N,2000.0,Y,Y,IM,http://www.hkmj.org/,Y,1024-2708,yes,0.0,0.0,,yes,,Hong Kong Medical Journal,2226-8707,1024-2708,https://www.hkmj.org/,63421.0,hk,commercial_publisher,https://hkampress.org/,Hong Kong Academy of Medicine Press,https://v2.sherpa.ac.uk/id/publisher_policy/5181,2021-08-20 08:36:23,2022-12-08 14:55:34,39590.0,,,,,,,,,,,Hong Kong Medical Journal,2226-8707,CC BY-NC-ND,No,No,No,2004-04-16T11:00:17Z,2017-07-19T08:52:51Z,2004,2226-8707
2274,400673,Indian journal of public health,Indian J Public Health,India,Mumbai,Medknow,1957,,Quarterly,,0019-557X,0019-557X,eng,N,1965.0,Y,Y,IM,http://www.ijph.in/,Y,0019-557X,yes,0.0,0.0,0.0,yes,,Indian Journal of Public Health,,0019-557X,https://www.ijph.in/,89.0,in,commercial_publisher,https://www.medknow.com/,Medknow Publications,https://v2.sherpa.ac.uk/id/publisher_policy/89,2010-11-08 13:51:01,2022-04-20 14:37:16,17830.0,,,,,,,,,,,Indian Journal of Public Health,2229-7693,CC BY-NC-SA,No,Yes,No,2018-09-07T09:35:16Z,2022-08-05T15:12:01Z,2018,2229-7693
3030,101156357,Journal of integrative neuroscience,J Integr Neurosci,Singapore,Singapore,IMR Press,2002,,Quarterly,,0219-6352,0219-6352,eng,N,2002.0,Y,Y,IM,https://jin.imrpress.com/EN/1757-448X/home.shtml,Y,0219-6352,yes,0.0,0.0,0.0,yes,,Journal of Integrative Neuroscience,1757-448X,0219-6352,https://www.imrpress.com/journal/JIN,62620.0,sg,commercial_publisher,https://www.imrpress.com/,IMR Press,https://v2.sherpa.ac.uk/id/publisher_policy/3619,2010-08-03 10:09:10,2022-02-15 13:08:56,6467.0,,,,,,,,,,,Journal of Integrative Neuroscience,1757-448X,CC BY,Yes,Yes,No,2020-01-24T12:09:26Z,2022-08-29T11:14:15Z,2020,1757-448X
3833,9107902,Molecular plant-microbe interactions : MPMI,Mol Plant Microbe Interact,United States,"St. Paul, MN",APS Press,1988,,"12 no. a year,",,0894-0282,0894-0282,eng,N,1988.0,Y,N,IM,,Y,0894-0282,yes,0.0,0.0,,yes,,Molecular Plant-Microbe Interactions,1943-7706,0894-0282,http://apsjournals.apsnet.org/loi/mpmi,326.0,us,society_publisher,https://www.apsnet.org/Pages/default.aspx,American Phytopathological Society,https://v2.sherpa.ac.uk/id/publisher_policy/4194,2010-08-18 15:26:45,2023-01-23 15:55:17,10349.0,,,,,,,,,,,Molecular Plant-Microbe Interactions,1943-7706,"CC BY, CC BY-NC-ND, CC0",Yes,Yes,No,2021-05-06T15:20:32Z,2022-09-16T13:35:13Z,2021,1943-7706
3841,9307314,Monaldi archives for chest disease = Archivio Monaldi per le malattie del torace,Monaldi Arch Chest Dis,Italy,Pavia,Fondazione clinica del lavoro edizioni : Distributed by PI-ME tipographia,1993,,Bimonthly,,1122-0643,1122-0643,eng,Y,1993.0,Y,Y,IM,http://www.monaldi-archives.org/index.php/macd/about,Y,1122-0643,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Monaldi Archives for Chest Disease,2532-5264,CC BY-NC,Yes,No,No,2019-02-15T15:38:02Z,2022-09-28T21:45:05Z,2019,2532-5264


In [20]:
# add new merged data
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_title'] = pubmed['DOAJ_title_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_pissn'] = pubmed['ISSN']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_eissn'] = pubmed['DOAJ_eissn_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_Seal'] = pubmed['DOAJ_Seal_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_date'] = pubmed['DOAJ_date_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_year'] = pubmed['DOAJ_year_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_license'] = pubmed['DOAJ_license_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_author_holds_copyright_without_restrictions'] = pubmed['DOAJ_author_holds_copyright_without_restrictions_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_last_updated'] = pubmed['DOAJ_last_updated_x']
pubmed.loc[pubmed['DOAJ_title_x'].notna(), 'DOAJ_APC'] = pubmed['DOAJ_APC_x']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_title'] = pubmed['DOAJ_title_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_pissn'] = pubmed['ISSN']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_eissn'] = pubmed['DOAJ_eissn_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_Seal'] = pubmed['DOAJ_Seal_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_date'] = pubmed['DOAJ_date_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_year'] = pubmed['DOAJ_year_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_license'] = pubmed['DOAJ_license_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_author_holds_copyright_without_restrictions'] = pubmed['DOAJ_author_holds_copyright_without_restrictions_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_last_updated'] = pubmed['DOAJ_last_updated_y']
pubmed.loc[pubmed['DOAJ_title_x'].isna() & pubmed['DOAJ_title_y'].notna(), 'DOAJ_APC'] = pubmed['DOAJ_APC_y']
del pubmed['DOAJ_title_x']
del pubmed['DOAJ_eissn_x']
del pubmed['DOAJ_Seal_x']
del pubmed['DOAJ_date_x']
del pubmed['DOAJ_year_x']
del pubmed['DOAJ_license_x']
del pubmed['DOAJ_author_holds_copyright_without_restrictions_x']
del pubmed['DOAJ_last_updated_x']
del pubmed['DOAJ_APC_x']
del pubmed['DOAJ_title_y']
del pubmed['DOAJ_eissn_y']
del pubmed['DOAJ_Seal_y']
del pubmed['DOAJ_date_y']
del pubmed['DOAJ_year_y']
del pubmed['DOAJ_license_y']
del pubmed['DOAJ_author_holds_copyright_without_restrictions_y']
del pubmed['DOAJ_last_updated_y']
del pubmed['DOAJ_APC_y']

In [21]:
pubmed.loc[pubmed['DOAJ_title'].notna(), 'DOAJ'] = 'yes'
pubmed

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_pissn,ISSN_old,DOAJ_title,DOAJ_eissn,DOAJ_Seal,DOAJ_date,DOAJ_year,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_last_updated,DOAJ_APC,DOAJ
0,9015384,20 century British history,20 Century Br Hist,England,"Eynsham, Oxford",Oxford University Press,1990,,"4 no. a year,",1477-4674,0955-2359,0955-2359,eng,N,1990.0,Y,N,QIS,,Y,1477-4674,yes,24.0,,24.0,yes,yes,Twentieth Century British History,1477-4674,0955-2359,https://academic.oup.com/tcbh,55.0,gb,university_publisher,https://academic.oup.com/journals/,Oxford University Press,https://v2.sherpa.ac.uk/id/publisher_policy/1112,2010-07-15 16:04:39,2022-07-26 10:25:23,1406.0,,,,,,,,,,,,
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,101269322,AACN advanced critical care,AACN Adv Crit Care,United States,"Aliso Viejo, CA",American Association of Critical-Care Nurses (AACN),2006,,Quarterly,1559-7776,1559-7768,1559-7768,eng,Y,2006.0,Y,Y,N,https://aacnjournals.org/aacnacconline,Y,1559-7776,yes,,,,,,AACN Advanced Critical Care,1559-7776,1559-7768,http://acc.aacnjournals.org/,663.0,us,society_publisher,https://www.aacn.org/,American Association of Critical Care Nurses,https://v2.sherpa.ac.uk/id/publisher_policy/663,2010-08-24 15:05:09,2022-07-08 08:42:33,10921.0,,,,,,,,,,,,
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,101223209,The AAPS journal,AAPS J,United States,"Arlington, Va., USA",American Association of Pharmaceutical Scientists,2004,,Four no. a year,1550-7416,,1550-7416,eng,Y,2004.0,Y,Y,IM,https://link.springer.com/journal/12248,Y,1550-7416,yes,12.0,,12.0,yes,yes,AAPS Journal,,1550-7416,http://link.springer.com/journal/12248,313.0,us,client_organisation,https://www.aaps.org/home,American Association of Pharmaceutical Scientists,https://v2.sherpa.ac.uk/id/publisher_policy/3291,2010-09-15 13:16:19,2023-01-05 14:55:40,16180.0,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5274,8702287,Zoological science,Zoolog Sci,Japan,"Tokyo, Japan",Zoological Society of Japan,1984,,"Monthly,",,0289-0003,0289-0003,eng,N,2002.0,Y,Y,IM,http://www.bioone.org/loi/jzoo,Y,0289-0003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5275,9435608,"Zoology (Jena, Germany)",Zoology (Jena),Germany,"Jena, Germany",Urban & Fischer,1994,,"Six no. a year,",1873-2720,0944-2006,0944-2006,eng,N,2005.0,Y,Y,IM,https://www.sciencedirect.com/journal/zoology,Y,1873-2720,yes,12.0,,12.0,yes,yes,,,,,,,,,,,,,15919.0,,,,,,,,,,,,
5276,101300786,Zoonoses and public health,Zoonoses Public Health,Germany,"Berlin, Germany",Blackwell Verlag,2007,,Ten no. a year,1863-2378,1863-1959,1863-1959,eng,Y,2007.0,Y,Y,IM,http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1863-2378,Y,1863-2378,yes,12.0,,12.0,yes,yes,Zoonoses and Public Health,1863-2378,1863-1959,https://onlinelibrary.wiley.com/journal/18632378,580.0,us,commercial_publisher,https://www.wiley.com/en-gb,Wiley,https://v2.sherpa.ac.uk/id/publisher_policy/2050,2010-07-20 14:58:33,2022-07-27 12:37:58,2555.0,,,,,,,,,,,,
5277,101179386,Zootaxa,Zootaxa,New Zealand,"Auckland, N.Z.",Magnolia Press,2001,,Irregular,1175-5334,1175-5326,1175-5326,eng,N,2013.0,Y,Y,IM,http://www.mapress.com/j/zt/,Y,1175-5334,yes,,,,,yes,Zootaxa,1175-5334,1175-5326,https://www.mapress.com/zt/,284.0,nz,commercial_publisher,https://www.mapress.com/,Magnolia Press,https://v2.sherpa.ac.uk/id/publisher_policy/284,2010-06-30 17:47:50,2022-07-15 08:53:17,6.0,,,,,,,,,,,,


In [22]:
# test Rev Med
pubmed.loc[pubmed['MedlineTA'] == 'Rev Med Suisse']

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_pissn,ISSN_old,DOAJ_title,DOAJ_eissn,DOAJ_Seal,DOAJ_date,DOAJ_year,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_last_updated,DOAJ_APC,DOAJ
4678,101219148,Revue médicale suisse,Rev Med Suisse,Switzerland,Genève,Médecine et Hygiène,2005,,Weekly (46 issues per year),,1660-9379,1660-9379,fre,Y,2005.0,Y,N,IM,,Y,1660-9379,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Counts

In [23]:
pubmed.shape[0]

5279

In [24]:
# journals in DOAJ
pubmed.loc[pubmed['DOAJ'] == 'yes'].shape[0]

806

In [25]:
# journals in DOAJ % 
pubmed.loc[pubmed['DOAJ'] == 'yes'].shape[0] / pubmed.shape[0]

0.15268043189998107

In [26]:
# journals not in DOAJ
pubmed.loc[pubmed['DOAJ'] != 'yes'].shape[0]

4473

In [27]:
# journals not in DOAJ % 
pubmed.loc[pubmed['DOAJ'] != 'yes'].shape[0] / pubmed.shape[0]

0.847319568100019

In [28]:
pubmed['DOAJ_Seal'].value_counts()

No     582
Yes    224
Name: DOAJ_Seal, dtype: int64

In [29]:
# journals in DOAJ with DOAJ seal
pubmed.loc[pubmed['DOAJ_Seal'] == 'Yes'].shape[0]

224

In [30]:
# journals in DOAJ with DOAJ seal % 
pubmed.loc[pubmed['DOAJ_Seal'] == 'Yes'].shape[0] / pubmed.loc[pubmed['DOAJ'] == 'yes'].shape[0] 

0.27791563275434245

In [31]:
# journals in DOAJ with DOAJ APC
pubmed.loc[pubmed['DOAJ_APC'] == 'Yes'].shape[0]

651

In [32]:
# journals in DOAJ with DOAJ APC %
pubmed.loc[pubmed['DOAJ_APC'] == 'Yes'].shape[0] / pubmed.loc[pubmed['DOAJ'] == 'yes'].shape[0] 

0.8076923076923077

In [33]:
# journals in DOAJ without DOAJ APC
pubmed.loc[pubmed['DOAJ_APC'] == 'No'].shape[0]

155

In [34]:
# journals in DOAJ without DOAJ APC %
pubmed.loc[pubmed['DOAJ_APC'] == 'No'].shape[0] / pubmed.loc[pubmed['DOAJ'] == 'yes'].shape[0] 

0.19230769230769232

In [35]:
# All journals in DOAJ with DOAJ APC
doaj.loc[doaj['DOAJ_APC'] == 'Yes'].shape[0]

6096

In [36]:
# All journals in DOAJ with DOAJ APC %
doaj.loc[doaj['DOAJ_APC'] == 'Yes'].shape[0] / doaj.shape[0] 

0.31759924976555176

In [37]:
# All journals in DOAJ without DOAJ APC
doaj.loc[doaj['DOAJ_APC'] == 'No'].shape[0]

13098

In [38]:
# All journals in DOAJ without DOAJ APC %
doaj.loc[doaj['DOAJ_APC'] == 'No'].shape[0] / doaj.shape[0] 

0.6824007502344482

In [None]:
# MEDLINE journals in DOAJ: 806
# 81% with APC (651)
# 19% non-APC (155)

# Compared to the complete DOAJ data, the proportion of diamond journals in MEDLINE is much more smaller than in the total DOAJ
# journals in DOAJ (April 2023): 19194
# 32% with APC
# 68% Non-APC
# 
# Study: OA Diamond Journals Study. Part 1: Findings, 2021 - https://doi.org/10.5281/zenodo.4558704
# Sept 2020 DOAJ data:
# 27% with APC
# 73% Non-APC

In [40]:
# journals in sherpa and DOAJ
pubmed.loc[(pubmed['sherpa_id'].notna()) & (pubmed['DOAJ'] == 'yes')].shape[0]

636

In [41]:
# journals in DOAJ and not Sherpa
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] == 'yes')].shape[0]

170

In [42]:
# test
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] == 'yes')]

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_pissn,ISSN_old,DOAJ_title,DOAJ_eissn,DOAJ_Seal,DOAJ_date,DOAJ_year,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_last_updated,DOAJ_APC,DOAJ
37,9103983,Acta cirúrgica brasileira,Acta Cir Bras,Brazil,Sao Paulo,Sociedade Brasileira Para O Desenvolvimento Da Pesquisa Em Cirurgia Curso De Pos-Graduacao Em Tecnica Operatoria E Cirurgia Experimental Escola Paulista De Medicina,1986,,Quarterly,1678-2674,0102-8650,0102-8650,eng,N,2005.0,Y,Y,IM,http://www.scielo.br/scielo.php?script=sci_issues&pid=0102-8650&lng=en&nrm=iso,Y,1678-2674,,,,,,,,,,,,,,,,,,,,1678-2674,,Acta Cirúrgica Brasileira,1678-2674,No,2004-04-23T21:31:00Z,2004,CC BY,No,2022-04-28T12:16:59Z,Yes,yes
53,101587903,Acta medica academica,Acta Med Acad,Bosnia and Herzegovina,Sarajevo,Akademija nauka i umjetnosti Bosne i Hercegovine,2006,,"Three issues a year,",1840-2879,1840-1848,1840-1848,eng,N,2012.0,Y,Y,IM,http://www.ama.ba/index.php/ama/issue/archive,Y,1840-2879,,,,,,,,,,,,,,,,,,,,1840-2879,,Acta Medica Academica,1840-2879,No,2013-01-08T11:33:20Z,2013,CC BY-NC,No,2017-06-24T15:19:56Z,No,yes
54,101241032,Acta medico-historica adriatica : AMHA,Acta Med Hist Adriat,Croatia,Rijeka [Croatia],Hrvatsko znanstveno društvo za povijest zdravstvene kulture,2003,,Two no. a year,1334-6253,1334-4366,1334-4366,eng,N,2009.0,Y,Y,IM,http://hrcak.srce.hr/amha?lang=en,Y,1334-6253,,,,,,,,,,,,,,,,,,,,1334-6253,,Acta Medico-Historica Adriatica,1334-6253,No,2018-09-07T10:29:11Z,2018,CC BY,Yes,2022-10-22T21:49:44Z,No,yes
55,7901042,Acta medica Indonesiana,Acta Med Indones,Indonesia,"Jakarta, Indonesia",Indonesian Society of Internal Medicine,1968,,"Quarterly,",,0125-9326,0125-9326,eng,N,2004.0,Y,Y,IM,http://www.actamedindones.org/index.php/ijim,Y,0125-9326,,,,,,,,,,,,,,,,,,,,0125-9326,2338-2732,Acta Medica Indonesiana,2338-2732,No,2013-10-03T13:46:32Z,2013,CC BY,Yes,2021-06-15T09:21:27Z,Yes,yes
80,8213019,Acta otorhinolaryngologica Italica : organo ufficiale della Società italiana di otorinolaringologia e chirurgia cervico-facciale,Acta Otorhinolaryngol Ital,Italy,[Pisa,Pacini editore,1981,,Six no. a year,1827-675X,0392-100X,0392-100X,eng,N,1981.0,Y,Y,IM,http://preview.ncbi.nlm.nih.gov/pmc/journals/559/,Y,1827-675X,,,,,,,,,,,,,,,,,,,,1827-675X,,Acta Otorhinolaryngologica Italica,1827-675X,No,2023-01-12T15:42:55Z,2023,CC BY-NC-ND,No,2023-03-26T14:52:52Z,Yes,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5102,101286676,Urology journal,Urol J,Iran,Tehran,Urology and Nephrology Research Center,2004,,"Bimonthly,",1735-546X,1735-1308,1735-1308,eng,N,2007.0,Y,Y,IM,http://www.urologyjournal.org/index.php/uj/index,Y,1735-546X,,,,,,,,,,,,,,,,,,,,1735-546X,,Urology Journal,1735-546X,No,2019-11-17T08:59:26Z,2019,CC BY,Yes,2020-10-08T13:34:27Z,No,yes
5132,0201543,Veterinaria italiana,Vet Ital,Italy,Teramo,Istituto sperimentale zooprofilattico.,1954,,Four no. a year,1828-1427,0505-401X,0505-401X,eng,N,2010.0,Y,Y,IM,https://www.veterinariaitaliana.izs.it/index.php/VetIt/issue/archive,Y,1828-1427,,,,,,,,,,,,,,,,,,,,1828-1427,,Veterinaria Italiana,1828-1427,No,2010-07-16T14:51:01Z,2010,CC BY-NC,No,2020-08-13T13:02:23Z,No,yes
5167,0417337,Voprosy virusologii,Vopr Virusol,Russia (Federation),Moskva,FBUN T︠S︡NII Ėpidemiologii Rospotrebnadzora,1956,,Bimonthly,2411-2097,0507-4088,0507-4088,rus,N,1965.0,Y,Y,IM,https://virusjour.crie.ru/jour/index,Y,2411-2097,,,,,,,,,,,,,,,,,,,,2411-2097,,Вопросы вирусологии,2411-2097,Yes,2022-12-18T17:41:44Z,2022,CC BY,Yes,2022-12-18T17:41:44Z,No,yes
5220,0414003,Yonsei medical journal,Yonsei Med J,Korea (South),Seoul,Yonsei University,1960,,Six no. a year,1976-2437,0513-5796,0513-5796,eng,N,1963.0,Y,Y,IM,http://www.eymj.org/index.php?body=archive,Y,1976-2437,,,,,,,,,,,,,,,,,,,,1976-2437,,Yonsei Medical Journal,1976-2437,No,2019-03-28T02:29:07Z,2019,CC BY-NC,No,2023-02-17T19:44:09Z,Yes,yes


In [43]:
# journals not in DOAJ and not in Sherpa
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] != 'yes')].shape[0]

738

In [44]:
# journals not in DOAJ and not in Sherpa % 
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] != 'yes')].shape[0] / pubmed.shape[0]

0.13979920439477173

In [45]:
# journals not in DOAJ and not in Sherpa
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] != 'yes')]

Unnamed: 0,NlmUniqueID,Title,MedlineTA,Country,Place,Publisher,PublicationFirstYear,PublicationEndYear,Frequency,ISSN-Electronic,ISSN-Print,ISSN-Linking,Language,TitleContinuationYN,IndexingStartDate,CurrentlyIndexedYN,IndexOnlineYN,IndexingSubset,IndexingSelectedURL,ReportedMedlineYN,ISSN,sherpa_has_oa_path,embargo,embargo_published_version,embargo_accepted_version,sherpa_oa_green,additional_oa_fee,title_sherpa,issne_sherpa,issnp_sherpa,url,publisher_id,publisher_country,publisher_type,publisher_url,publisher_name,sherpa_uri,sherpa_created,sherpa_last_modified,sherpa_id,DOAJ_pissn,ISSN_old,DOAJ_title,DOAJ_eissn,DOAJ_Seal,DOAJ_date,DOAJ_year,DOAJ_license,DOAJ_author_holds_copyright_without_restrictions,DOAJ_last_updated,DOAJ_APC,DOAJ
1,101714112,A&A practice,A A Pract,United States,"[Philadelphia, PA]","Wolters Kluwer Health, Inc.",2018,,Biweekly,2575-3126,,2575-3126,eng,Y,2018.0,Y,Y,IM,https://ovidsp.ovid.com/ovidweb.cgi?T=JS&MODE=ovid&PAGE=toc&D=ovft&AN=02054229-000000000-00000,Y,2575-3126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0431420,AANA journal,AANA J,United States,"Park Ridge, Ill.",American Association of Nurse Anesthetists,1974,,Bimonthly,2162-5239,0094-6354,0094-6354,eng,N,1974.0,Y,Y,N,https://www.aana.com/publications/aana-journal,Y,2162-5239,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
26,101206716,Acta biochimica et biophysica Sinica,Acta Biochim Biophys Sin (Shanghai),China,Shanghai,China Science Publishing & Media Ltd.,2004,,Monthly,1745-7270,1672-9145,1672-9145,eng,Y,2004.0,Y,Y,IM,https://academic.oup.com/abbs,Y,1745-7270,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
28,101194794,Acta of bioengineering and biomechanics,Acta Bioeng Biomech,Poland,Wrocław,Oficyna Wydawnicza Politechniki Wrocławskiej,1999,,Irregular,,1509-409X,1509-409X,eng,N,2007.0,Y,Y,IM,http://www.actabio.pwr.wroc.pl/archive.php,Y,1509-409X,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
30,101295064,Acta bio-medica : Atenei Parmensis,Acta Biomed,Italy,"Fidenza, Parma",Mattioli 1885,2002,,Three no. a year,2531-6745,0392-4203,0392-4203,eng,Y,2002.0,Y,Y,IM,http://www.mattioli1885journals.com/index.php/actabiomedica/issue/archive,Y,2531-6745,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5268,9425197,Zhonghua yi xue yi chuan xue za zhi = Zhonghua yixue yichuanxue zazhi = Chinese journal of medical genetics,Zhonghua Yi Xue Yi Chuan Xue Za Zhi,China,"Chengdu, Sichuan, P.R. China",Sichuan University,1992,,Bimonthly,,1003-9406,1003-9406,chi,N,1998.0,Y,N,IM,,Y,1003-9406,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5269,7511141,Zhonghua yi xue za zhi,Zhonghua Yi Xue Za Zhi,China,Beijing,Zhonghua yi xue hui,1960,,"Semimonthly,",,0376-2491,0376-2491,chi,N,1973.0,Y,N,IM,,Y,0376-2491,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5270,7904962,Zhonghua yu fang yi xue za zhi [Chinese journal of preventive medicine],Zhonghua Yu Fang Yi Xue Za Zhi,China,Beijing,Zhonghua yi xue hui,1967,,Bimonthly,,0253-9624,0253-9624,chi,N,1979.0,Y,N,IM,,Y,0253-9624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5271,7910681,Zhonghua zhong liu za zhi [Chinese journal of oncology],Zhonghua Zhong Liu Za Zhi,China,Peking,Chinese Medical Association,1979,,Bimonthly,,0253-3766,0253-3766,chi,N,1979.0,Y,N,IM,,Y,0253-3766,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Add DOAJ data to MeSH

In [46]:
MeshHeadings = pd.read_csv('data/temp/2023/merge_MeshHeadings_sherpa.tsv', encoding='utf-8', header=0, sep='\t')
MeshHeadings

Unnamed: 0,NlmUniqueID,MeshHeading,sherpa_id_x,sherpa_has_oa_path,embargo,sherpa_id_y,embargo_published_version,embargo_accepted_version,sherpa_oa_green,sherpa_id,additional_oa_fee,sherpa_oa_hybrid
0,9015384,History,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold
1,9015384,United Kingdom,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold
2,101637720,Anesthesiology,,,,,,,,,,
3,101714112,Anesthesiology,,,,,,,,,,
4,101269322,Critical Care,10921.0,yes,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
26785,21830020R,History of Medicine,,,,,,,,,,
26786,0233767,Dentistry,,,,,,,,,,
26787,9309124,Embryonic Development,2242.0,yes,6.0,2242.0,,6.0,yes,2242.0,yes,hybrid or gold
26788,9309124,Fetal Development,2242.0,yes,6.0,2242.0,,6.0,yes,2242.0,yes,hybrid or gold


In [47]:
BroadJournalHeadings = pd.read_csv('data/temp/2023/merge_BroadJournalHeadings_sherpa.tsv', encoding='utf-8', header=0, sep='\t')
BroadJournalHeadings

Unnamed: 0,NlmUniqueID,BroadJournalHeading,sherpa_id_x,sherpa_has_oa_path,embargo,sherpa_id_y,embargo_published_version,embargo_accepted_version,sherpa_oa_green,sherpa_id,additional_oa_fee,sherpa_oa_hybrid
0,9015384,History of Medicine,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold
1,101637720,Anesthesiology,,,,,,,,,,
2,101714112,Anesthesiology,,,,,,,,,,
3,101269322,Critical Care,10921.0,yes,,,,,,,,
4,101269322,Nursing,10921.0,yes,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
16386,0056272,Reproductive Medicine,,,,,,,,,,
16387,0056272,Veterinary Medicine,,,,,,,,,,
16388,21830020R,History of Medicine,,,,,,,,,,
16389,0233767,Dentistry,,,,,,,,,,


In [48]:
pubmed_doaj = pubmed[['NlmUniqueID', 'DOAJ_Seal', 'DOAJ']]
pubmed_doaj

Unnamed: 0,NlmUniqueID,DOAJ_Seal,DOAJ
0,9015384,,
1,101714112,,
2,101269322,,
3,0431420,,
4,101223209,,
...,...,...,...
5274,8702287,,
5275,9435608,,
5276,101300786,,
5277,101179386,,


In [49]:
MeshHeadings = pd.merge(MeshHeadings, pubmed_doaj, on='NlmUniqueID', how='left')
MeshHeadings

Unnamed: 0,NlmUniqueID,MeshHeading,sherpa_id_x,sherpa_has_oa_path,embargo,sherpa_id_y,embargo_published_version,embargo_accepted_version,sherpa_oa_green,sherpa_id,additional_oa_fee,sherpa_oa_hybrid,DOAJ_Seal,DOAJ
0,9015384,History,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold,,
1,9015384,United Kingdom,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold,,
2,101637720,Anesthesiology,,,,,,,,,,,,
3,101714112,Anesthesiology,,,,,,,,,,,,
4,101269322,Critical Care,10921.0,yes,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26785,21830020R,History of Medicine,,,,,,,,,,,,
26786,0233767,Dentistry,,,,,,,,,,,,
26787,9309124,Embryonic Development,2242.0,yes,6.0,2242.0,,6.0,yes,2242.0,yes,hybrid or gold,,
26788,9309124,Fetal Development,2242.0,yes,6.0,2242.0,,6.0,yes,2242.0,yes,hybrid or gold,,


In [50]:
BroadJournalHeadings = pd.merge(BroadJournalHeadings, pubmed_doaj, on='NlmUniqueID', how='left')
BroadJournalHeadings

Unnamed: 0,NlmUniqueID,BroadJournalHeading,sherpa_id_x,sherpa_has_oa_path,embargo,sherpa_id_y,embargo_published_version,embargo_accepted_version,sherpa_oa_green,sherpa_id,additional_oa_fee,sherpa_oa_hybrid,DOAJ_Seal,DOAJ
0,9015384,History of Medicine,1406.0,yes,24.0,1406.0,,24.0,yes,1406.0,yes,hybrid or gold,,
1,101637720,Anesthesiology,,,,,,,,,,,,
2,101714112,Anesthesiology,,,,,,,,,,,,
3,101269322,Critical Care,10921.0,yes,,,,,,,,,,
4,101269322,Nursing,10921.0,yes,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16386,0056272,Reproductive Medicine,,,,,,,,,,,,
16387,0056272,Veterinary Medicine,,,,,,,,,,,,
16388,21830020R,History of Medicine,,,,,,,,,,,,
16389,0233767,Dentistry,,,,,,,,,,,,


## Exports

In [51]:
# exports csv
pubmed.to_csv('data/temp/2023/merge_pubmed_sherpa_doaj.tsv', sep='\t', encoding='utf-8', index=False)
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] == 'yes')].to_csv('data/temp/2023/pubmed_doaj_not_in_sherpa.tsv', sep='\t', encoding='utf-8', index=False)
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] != 'yes')].to_csv('data/temp/2023/pubmed_not_in_doaj_and_not_in_sherpa.tsv', sep='\t', encoding='utf-8', index=False)
MeshHeadings.to_csv('data/temp/2023/merge_MeshHeadings_sherpa_doaj.tsv', sep='\t', encoding='utf-8', index=False)
BroadJournalHeadings.to_csv('data/temp/2023/merge_BroadJournalHeadings_sherpa_doaj.tsv', sep='\t', encoding='utf-8', index=False)

In [52]:
# exports excel
pubmed.to_excel('data/temp/2023/merge_pubmed_sherpa_doaj.xlsx', index=False)
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] == 'yes')].to_excel('data/temp/2023/pubmed_doaj_not_in_sherpa.xlsx', index=False)
pubmed.loc[(pubmed['sherpa_id'].isna()) & (pubmed['DOAJ'] != 'yes')].to_excel('data/temp/2023/pubmed_not_in_doaj_and_not_in_sherpa.xlsx', index=False)
MeshHeadings.to_excel('data/temp/2023/merge_MeshHeadings_sherpa_doaj.xlsx', index=False)
BroadJournalHeadings.to_excel('data/temp/2023/merge_BroadJournalHeadings_sherpa_doaj.xlsx', index=False)