In [1]:
import json
import pandas as pd
import requests
from itertools import compress

Change the cell below to true if the data/CDR and data/OHW files ever need refreshing.

In [2]:
file_refresh = False

In [10]:
if file_refresh == True:

    # Refreshing CDR files

    # running this cell will output to the src folder. The files should be reviewed and moved to the data/CDR folder when done.
    search_dict = {
        'CDR_01.json':'carbon%20removal',
        'CDR_02.json':'carbon%20dioxide%20removal',
        'CDR_03.json':'carbon%20sequestration',
        'CDR_04.json':'carbon%20dioxide%20sequestration',
        'CDR_05.json':'carbon%20capture',
        'CDR_06.json':'carbon%20capture%20and%20sequestration',
        'CDR_07.json':'carbon%20capture%20and%20storage',
        'CDR_08.json':'carbon%20capture%20utilization%20and%20storage',
        'CDR_09.json':'CCS',
        'CDR_10.json':'CCUS',
        'CDR_11.json':'CDR',
        'CDR_12.json':'carbon%20mineralization',
        'CDR_13.json':'carbon%dioxide%20mineralization',
        'CDR_14.json':'carbon%20storage',
        'CDR_15.json':'direct%20air%20capture',
        'CDR_16.json':'direct%20air%20carbon%20capture',
    }

    filenames = []
    searches = []
    for item in search_dict.items():
        #print(item)
        key, value = item
        filename = key
        prefix = 'https://api.openalex.org/works?select=id,display_name,authorships,referenced_works,open_access&filter=from_publication_date:2020-01-01,to_publication_date:2023-08-01,fulltext.search:'
        insert = value
        suffix = '&page={}'
        url = prefix + insert + suffix
        filenames.append(filename)
        searches.append(url)

        #used_url = 'https://api.openalex.org/works?select=id,display_name,authorships,referenced_works,open_access&filter=from_publication_date:2020-01-01,to_publication_date:2023-08-01,fulltext.search:carbon%20removal&page={}'
        #print(url)

    for i in range(len(filenames)):

        filename = filenames[i]
        use_url = searches[i]

        print("on filename", filename)

        page = 1
        has_more_pages = True
        fewer_than_10k_results = True

        all_results = []

        # loop through pages
        while has_more_pages and fewer_than_10k_results:

            print("on page", str(page))
            
            # set page value and request page from OpenAlex
            url = use_url.format(page)
            #print('\n' + url)
            page_with_results = requests.get(url).json()
            
            # loop through partial list of results
            results = page_with_results['results']
            for i,work in enumerate(results):
                all_results.append(work)

            # next page
            page += 1
            
            # end loop when either there are no more results on the requested page 
            # or the next request would exceed 10,000 results
            per_page = page_with_results['meta']['per_page']
            has_more_pages = len(results) == per_page
            fewer_than_10k_results = per_page * page <= 10000

        with(open(filename, 'w')) as outf:
            json.dump(all_results, outf)



    # Refreshing OHW files

    # running this cell will output to the src folder. The files should be reviewed and moved to the data/CDR folder when done.
    OHW_OH_json = json.load(open('/workspaces/OHW_in_CDR/data/OHW/OHW_OH.json'))

    search_dict = {
        'OHW_OH.json':'open%20hardware',
        'OHW_OSH.json':'open%20source%20hardware'
    }

    filenames = []
    searches = []
    for item in search_dict.items():
        key, value = item
        filename = key
        prefix = 'https://api.openalex.org/works?select=id,display_name,authorships,referenced_works,open_access&filter=from_publication_date:2020-01-01,to_publication_date:2023-08-01,fulltext.search:'
        insert = value
        suffix = '&page={}'
        url = prefix + insert + suffix
        filenames.append(filename)
        searches.append(url)

    for i in range(len(filenames)):

        filename = filenames[i]
        use_url = searches[i]
        print("on filename", filename)

        page = 1
        has_more_pages = True
        fewer_than_10k_results = True

        all_results = []

        # loop through pages
        while has_more_pages and fewer_than_10k_results:

            print("on page", str(page))
            
            # set page value and request page from OpenAlex
            url = use_url.format(page)
            #print('\n' + url)
            page_with_results = requests.get(url).json()
            
            # loop through partial list of results
            results = page_with_results['results']
            for i,work in enumerate(results):
                all_results.append(work)

            # next page
            page += 1
            
            # end loop when either there are no more results on the requested page 
            # or the next request would exceed 10,000 results
            per_page = page_with_results['meta']['per_page']
            has_more_pages = len(results) == per_page
            fewer_than_10k_results = per_page * page <= 10000

        with(open(filename, 'w')) as outf:
            json.dump(all_results, outf)

## Combine files to make dataframes

In [3]:
file_nums = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16']
all_works = []

for i in range(len(file_nums)):

    file_num = file_nums[i]
    pathstr = '/workspaces/OHW_in_CDR/data/CDR/CDR_' + file_num + '.json'
    lst = json.load(open(pathstr))

    for r in lst:
        works = [r['id'], r['open_access']['is_oa'], r['referenced_works'], r['authorships']]
        all_works.append(works)

print(len(all_works))

df_works_CDR = pd.DataFrame(all_works)
df_works_CDR.head()

# 11798

11798


Unnamed: 0,0,1,2,3
0,https://openalex.org/W2965283917,False,"[https://openalex.org/W167104987, https://open...","[{'author_position': 'first', 'author': {'id':..."
1,https://openalex.org/W2965654741,False,"[https://openalex.org/W1149082768, https://ope...","[{'author_position': 'first', 'author': {'id':..."
2,https://openalex.org/W2963817167,False,"[https://openalex.org/W1530222901, https://ope...","[{'author_position': 'first', 'author': {'id':..."
3,https://openalex.org/W2960882361,False,"[https://openalex.org/W207497656, https://open...","[{'author_position': 'first', 'author': {'id':..."
4,https://openalex.org/W2972174901,False,"[https://openalex.org/W432734680, https://open...","[{'author_position': 'first', 'author': {'id':..."


In [4]:
file_nums = ['OH', 'OSH']
all_ohw_works = []

for i in range(len(file_nums)):

    file_num = file_nums[i]
    pathstr = '/workspaces/OHW_in_CDR/data/OHW/OHW_' + file_num + '.json'
    lst = json.load(open(pathstr))

    for r in lst:
        works = [r['id'], r['open_access']['is_oa'], r['referenced_works'], r['authorships']]
        all_ohw_works.append(works)

print(len(all_ohw_works))

df_works_OHW = pd.DataFrame(all_ohw_works)
df_works_OHW.head()

# 2351

2351


Unnamed: 0,0,1,2,3
0,https://openalex.org/W2962730651,True,"[https://openalex.org/W23953656, https://opena...","[{'author_position': 'first', 'author': {'id':..."
1,https://openalex.org/W3007172120,True,"[https://openalex.org/W625729589, https://open...","[{'author_position': 'first', 'author': {'id':..."
2,https://openalex.org/W2969697338,True,"[https://openalex.org/W1593082705, https://ope...","[{'author_position': 'first', 'author': {'id':..."
3,https://openalex.org/W2892013400,True,[],"[{'author_position': 'first', 'author': {'id':..."
4,https://openalex.org/W2922347011,False,"[https://openalex.org/W424832857, https://open...","[{'author_position': 'first', 'author': {'id':..."


### Separate into OA (open access) and NA (non-open access) objects

In [5]:
df_CDR_OA = df_works_CDR.loc[df_works_CDR[1] == True]
df_CDR_NA = df_works_CDR.loc[df_works_CDR[1] == False]
print(len(df_CDR_OA), len(df_CDR_NA))

# 5693 6105

df_OHW_OA = df_works_OHW.loc[df_works_OHW[1] == True]
df_OHW_NA = df_works_OHW.loc[df_works_OHW[1] == False]
print(len(df_OHW_OA), len(df_OHW_NA))

# 1215 1136

5693 6105
1215 1136


## Open hardware cited in the full text of co-authorsâ€™ works (2nd connection to OHW)

This is to answer the question of whether open hardware works added value to the work being published by being a work published by one or more co-authors, indicating likely awareness among the authors of open hardware because one of the co-authors has published OHW-related research.

### Functions

In [6]:
def get_author_ids(df):
    ids = []
    col = df[3]
    for r in col:
        work_authors = r
        for work_author in work_authors:
            ids.append(work_author['author']['id'])
    return ids

def get_author_objects(author_ids):
    searches = []
    author_objs = []

    for id in author_ids:
        prefix = 'https://api.openalex.org/authors?filter=ids.openalex:'
        insert = id
        suffix = '&page={}'
        url = prefix + insert + suffix
        searches.append(url)

    for i in range(len(searches)):
        use_url = searches[i]
        print("on search", str(i))

        page = 1
        has_more_pages = True
        fewer_than_10k_results = True

        # loop through pages
        while has_more_pages and fewer_than_10k_results:

            #print("on page", str(page))
            
            # set page value and request page from OpenAlex
            url = use_url.format(page)
            #print('\n' + url)
            page_with_results = requests.get(url).json()
            
            # loop through partial list of results
            results = page_with_results['results']
            for i,author in enumerate(results):
                author_objs.append(author)

            # next page
            page += 1
            
            # end loop when either there are no more results on the requested page 
            # or the next request would exceed 10,000 results
            per_page = page_with_results['meta']['per_page']
            has_more_pages = len(results) == per_page
            fewer_than_10k_results = per_page * page <= 10000

    return author_objs
    
def get_citation_metrics(author_ids, author_objs):
    saved_summary_stats = []
    #print("in citation metrics")
    for r in author_objs:
        id = r['id']
        ss_obj = r['summary_stats']
        #print(ss_obj)
        summary_stats = [id, ss_obj['2yr_mean_citedness'], ss_obj['h_index'], ss_obj['i10_index']]
        #print(summary_stats)
        if summary_stats[0] in author_ids:
            saved_summary_stats.append(summary_stats)
    df_summary_stats = pd.DataFrame(saved_summary_stats)
    print(df_summary_stats.head())

    return df_summary_stats

def get_ref_ids(df):
    refs = []
    col = df[2]
    for r in col:
        work_refs = r
        for work_ref in work_refs:
            refs.append(work_ref)
    return refs

def get_ids(df):
    ids = []
    col = df[0]
    for r in col:
        id = r
        ids.append(id)
    return ids


In [7]:
# for assessing author matches
cdr_oa_author_ids = get_author_ids(df_CDR_OA)
cdr_na_author_ids = get_author_ids(df_CDR_NA)
ohw_oa_author_ids = get_author_ids(df_OHW_OA)
ohw_na_author_ids = get_author_ids(df_OHW_NA)

# for assessing referenced work matches
cdr_oa_ref_ids = get_ref_ids(df_CDR_OA)
cdr_na_ref_ids = get_ref_ids(df_CDR_NA)
ohw_oa_ids = get_ids(df_OHW_OA)
ohw_na_ids = get_ids(df_OHW_NA)

print(len(all_works), len(all_ohw_works))
print(len(cdr_oa_author_ids), len(cdr_na_author_ids))
print(len(ohw_oa_author_ids), len(ohw_na_author_ids))
print(len(ohw_oa_ids), len(ohw_na_ids))

# 11798 2351
# 33151 28766
# 6695 4741
# 1215 1136

11798 2351
33151 28766
6695 4741
1215 1136


### Look for matches between the CDR author IDs and the OHW author IDs

In [8]:
ohw_author_ids = []
for id in ohw_oa_author_ids:
    ohw_author_ids.append(id)
for id in ohw_na_author_ids:
    ohw_author_ids.append(id)

cdr_ohw_oa_author_matches = (el in cdr_oa_author_ids for el in ohw_author_ids)
cdr_ohw_na_author_matches = (el in cdr_na_author_ids for el in ohw_author_ids)

cdr_ohw_oa_author_match_count = sum(cdr_ohw_oa_author_matches)
cdr_ohw_na_author_match_count = sum(cdr_ohw_na_author_matches)
print(cdr_ohw_oa_author_match_count, cdr_ohw_na_author_match_count)

# 1027 720, 16.7 s

1027 720


### For those matches, get the author ID to see later how their metrics performed

In [9]:
cdr_ohw_oa_author_matches = [el in cdr_oa_author_ids for el in ohw_author_ids]
match_idxs = list(compress(range(len(cdr_ohw_oa_author_matches)), cdr_ohw_oa_author_matches))
cdr_ohw_oa_match_author_ids = [cdr_oa_author_ids[i] for i in match_idxs]
print(cdr_ohw_oa_match_author_ids)

cdr_ohw_na_author_matches = [el in cdr_na_author_ids for el in ohw_author_ids]
match_idxs = list(compress(range(len(cdr_ohw_na_author_matches)), cdr_ohw_na_author_matches))
cdr_ohw_na_match_author_ids = [cdr_na_author_ids[i] for i in match_idxs]
print(cdr_ohw_na_match_author_ids)

['https://openalex.org/A5031596546', 'https://openalex.org/A5033222429', 'https://openalex.org/A5090551891', 'https://openalex.org/A5028991114', 'https://openalex.org/A5033930341', 'https://openalex.org/A5043428176', 'https://openalex.org/A5066130422', 'https://openalex.org/A5025866626', 'https://openalex.org/A5087485119', 'https://openalex.org/A5037685160', 'https://openalex.org/A5087875642', 'https://openalex.org/A5050505502', 'https://openalex.org/A5085358854', 'https://openalex.org/A5031004968', 'https://openalex.org/A5058754904', 'https://openalex.org/A5054962335', 'https://openalex.org/A5016723729', 'https://openalex.org/A5075781598', 'https://openalex.org/A5052757018', 'https://openalex.org/A5014859221', 'https://openalex.org/A5086701362', 'https://openalex.org/A5088606870', 'https://openalex.org/A5088539703', 'https://openalex.org/A5062151808', 'https://openalex.org/A5019723384', 'https://openalex.org/A5029154750', 'https://openalex.org/A5068222384', 'https://openalex.org/A5043

In [10]:
cdr_ohw_oa_author_nonmatches = [not el in cdr_oa_author_ids for el in ohw_author_ids]
match_idxs = list(compress(range(len(cdr_ohw_oa_author_nonmatches)), cdr_ohw_oa_author_nonmatches))
cdr_ohw_oa_nonmatch_author_ids = [cdr_oa_author_ids[i] for i in match_idxs]
print(cdr_ohw_oa_nonmatch_author_ids)

cdr_ohw_na_author_nonmatches = [not el in cdr_na_author_ids for el in ohw_author_ids]
match_idxs = list(compress(range(len(cdr_ohw_na_author_nonmatches)), cdr_ohw_na_author_nonmatches))
cdr_ohw_na_nonmatch_author_ids = [cdr_na_author_ids[i] for i in match_idxs]
print(cdr_ohw_na_nonmatch_author_ids)

['https://openalex.org/A5040852913', 'https://openalex.org/A5004669294', 'https://openalex.org/A5032238680', 'https://openalex.org/A5069020928', 'https://openalex.org/A5014712981', 'https://openalex.org/A5012354875', 'https://openalex.org/A5078153512', 'https://openalex.org/A5045728989', 'https://openalex.org/A5018117985', 'https://openalex.org/A5090456927', 'https://openalex.org/A5067098626', 'https://openalex.org/A5080392802', 'https://openalex.org/A5075629927', 'https://openalex.org/A5054458219', 'https://openalex.org/A5007073406', 'https://openalex.org/A5031988136', 'https://openalex.org/A5083343579', 'https://openalex.org/A5008847384', 'https://openalex.org/A5019231447', 'https://openalex.org/A5081263016', 'https://openalex.org/A5051958769', 'https://openalex.org/A5089722586', 'https://openalex.org/A5048736411', 'https://openalex.org/A5030882809', 'https://openalex.org/A5026647542', 'https://openalex.org/A5044102443', 'https://openalex.org/A5069563384', 'https://openalex.org/A5021

In [11]:
# for assessing whether OHW is helpful for citation metrics
# each call returns a tuple of (ids, impact_factors, h_indices, i10_indices)
cdr_ohw_oa_nonmatch_author_ids_trunc = cdr_ohw_oa_nonmatch_author_ids[0:len(cdr_ohw_oa_match_author_ids)]
cdr_ohw_na_nonmatch_author_ids_trunc = cdr_ohw_na_nonmatch_author_ids[0:len(cdr_ohw_na_match_author_ids)]

In [12]:
author_objs_filenames_list = ['cdr_ohw_oa_author_objs.json', 'cdr_ohw_na_author_objs.json', 'cdr_nohw_oa_author_objs.json', 'cdr_nohw_na_author_objs.json']
author_objs_list = [cdr_ohw_oa_author_objs, cdr_ohw_na_author_objs, cdr_nohw_oa_author_objs, cdr_nohw_na_author_objs]

if file_refresh == True:
    # takes just over 14 minutes for all 4 of these to complete.
    cdr_ohw_oa_author_objs = get_author_objects(cdr_ohw_oa_match_author_ids)
    cdr_ohw_na_author_objs = get_author_objects(cdr_ohw_na_match_author_ids)
    cdr_nohw_oa_author_objs = get_author_objects(cdr_ohw_oa_nonmatch_author_ids_trunc)
    cdr_nohw_na_author_objs = get_author_objects(cdr_ohw_na_nonmatch_author_ids_trunc)

    for i in range(len(author_objs_list)):
        filename = author_objs_filenames_list[i]
        obj = author_objs_list[i]
        with(open(filename, 'w')) as outf:
            json.dump(obj, outf)
else:
    #reload files from disk
    for i in range(len(author_objs_list)):
        filename = author_objs_filenames_list[i]
        obj = author_objs_list[i]
        with(open(filename, 'r')) as inputf:
            json.load(inputf)
    

on search 0
on search 1


on search 2
on search 3
on search 4
on search 5
on search 6
on search 7
on search 8
on search 9
on search 10
on search 11
on search 12
on search 13
on search 14
on search 15
on search 16
on search 17
on search 18
on search 19
on search 20
on search 21
on search 22
on search 23
on search 24
on search 25
on search 26
on search 27
on search 28
on search 29
on search 30
on search 31
on search 32
on search 33
on search 34
on search 35
on search 36
on search 37
on search 38
on search 39
on search 40
on search 41
on search 42
on search 43
on search 44
on search 45
on search 46
on search 47
on search 48
on search 49
on search 50
on search 51
on search 52
on search 53
on search 54
on search 55
on search 56
on search 57
on search 58
on search 59
on search 60
on search 61
on search 62
on search 63
on search 64
on search 65
on search 66
on search 67
on search 68
on search 69
on search 70
on search 71
on search 72
on search 73
on search 74
on search 75
on search 76
on search 77
on search 78
on sear

In [14]:
cdr_ohw_oa_metrics = get_citation_metrics(cdr_ohw_oa_match_author_ids, cdr_ohw_oa_author_objs)
cdr_ohw_na_metrics = get_citation_metrics(cdr_ohw_na_match_author_ids, cdr_ohw_na_author_objs)
cdr_nohw_oa_metrics = get_citation_metrics(cdr_ohw_oa_nonmatch_author_ids, cdr_nohw_oa_author_objs)
cdr_nohw_na_metrics = get_citation_metrics(cdr_ohw_na_nonmatch_author_ids, cdr_nohw_na_author_objs)

                                  0          1   2    3
0  https://openalex.org/A5031596546   4.696970  19   30
1  https://openalex.org/A5033222429  33.000000   3    3
2  https://openalex.org/A5090551891   5.812500  29   47
3  https://openalex.org/A5028991114  25.666667  24   35
4  https://openalex.org/A5033930341   6.853659  58  139
                                  0          1   2    3
0  https://openalex.org/A5027005719  51.000000   4    4
1  https://openalex.org/A5020955635  10.882353  70  274
2  https://openalex.org/A5083222972   8.750000  26   49
3  https://openalex.org/A5067841427   8.312500  24   42
4  https://openalex.org/A5083284087   7.428571  55  194
                                  0          1   2   3
0  https://openalex.org/A5040852913  13.333333   9   9
1  https://openalex.org/A5004669294  23.571429   5   3
2  https://openalex.org/A5032238680   9.482759  24  61
3  https://openalex.org/A5069020928   5.857143  21  35
4  https://openalex.org/A5014712981  33.000000   1   

In [15]:
#ohw_oa_metrics = get_citation_metrics2(ohw_oa_author_ids, get_author_objects(ohw_oa_author_ids))
#ohw_na_metrics = get_citation_metrics2(ohw_na_author_ids, get_author_objects(ohw_na_author_ids))

### Get averages for citation metrics for each group

In [16]:
# Impact Factors
avg_if_ohw_oa = cdr_ohw_oa_metrics[1].mean()
avg_if_ohw_na = cdr_ohw_na_metrics[1].mean()
avg_if_nohw_oa = cdr_nohw_oa_metrics[1].mean()
avg_if_nohw_na = cdr_nohw_na_metrics[1].mean()

# H-Indices
avg_h_ohw_oa = cdr_ohw_oa_metrics[2].mean()
avg_h_ohw_na = cdr_ohw_na_metrics[2].mean()
avg_h_nohw_oa = cdr_nohw_oa_metrics[2].mean()
avg_h_nohw_na = cdr_nohw_na_metrics[2].mean()

# I10-Indices
avg_i10_ohw_oa = cdr_ohw_oa_metrics[3].mean()
avg_i10_ohw_na = cdr_ohw_na_metrics[3].mean()
avg_i10_nohw_oa = cdr_nohw_oa_metrics[3].mean()
avg_i10_nohw_na = cdr_nohw_na_metrics[3].mean()

# Print results
print("IF", avg_if_ohw_oa, avg_if_ohw_na, avg_if_nohw_oa, avg_if_nohw_na)
print("H", avg_h_ohw_oa, avg_h_ohw_na, avg_h_nohw_oa, avg_h_nohw_na)
print("I10", avg_i10_ohw_oa, avg_i10_ohw_na, avg_i10_nohw_oa, avg_i10_nohw_na)

'''
IF 5.358506783107094 6.5121776818502894 8.309970363960561 12.45912182456398
H 23.527750730282374 29.658333333333335 27.33592989289192 36.71666666666667
I10 81.93184031158715 184.59444444444443 91.95423563777995 247.88611111111112
'''

IF 5.358506783107094 6.5121776818502894 8.309970363960561 12.45912182456398
H 23.527750730282374 29.658333333333335 27.33592989289192 36.71666666666667
I10 81.93184031158715 184.59444444444443 91.95423563777995 247.88611111111112


'\nIF 5.358506783107094 6.5121776818502894 8.309970363960561 12.45912182456398\nH 23.527750730282374 29.658333333333335 27.33592989289192 36.71666666666667\nI10 81.93184031158715 184.59444444444443 91.95423563777995 247.88611111111112\n'

### Get minima for citation metrics in each group

In [17]:
# Impact Factors
min_if_ohw_oa = cdr_ohw_oa_metrics[1].min()
min_if_ohw_na = cdr_ohw_na_metrics[1].min()
min_if_nohw_oa = cdr_nohw_oa_metrics[1].min()
min_if_nohw_na = cdr_nohw_na_metrics[1].min()

# H-Indices
min_h_ohw_oa = cdr_ohw_oa_metrics[2].min()
min_h_ohw_na = cdr_ohw_na_metrics[2].min()
min_h_nohw_oa = cdr_nohw_oa_metrics[2].min()
min_h_nohw_na = cdr_nohw_na_metrics[2].min()

# I10-Indices
min_i10_ohw_oa = cdr_ohw_oa_metrics[3].min()
min_i10_ohw_na = cdr_ohw_na_metrics[3].min()
min_i10_nohw_oa = cdr_nohw_oa_metrics[3].min()
min_i10_nohw_na = cdr_nohw_na_metrics[3].min()

# Print results
print("IF", min_if_ohw_oa, min_if_ohw_na, min_if_nohw_oa, min_if_nohw_na)
print("H", min_h_ohw_oa, min_h_ohw_na, min_h_nohw_oa, min_h_nohw_na)
print("I10", min_i10_ohw_oa, min_i10_ohw_na, min_i10_nohw_oa, min_i10_nohw_na)

IF 0.0 0.0 0.5454545454545454 1.5061728395061729
H 0 1 1 1
I10 0 0 0 1


### Get maxima for citation metrics in each group

In [18]:
# Impact Factors
max_if_ohw_oa = cdr_ohw_oa_metrics[1].max()
max_if_ohw_na = cdr_ohw_na_metrics[1].max()
max_if_nohw_oa = cdr_nohw_oa_metrics[1].max()
max_if_nohw_na = cdr_nohw_na_metrics[1].max()

# H-Indices
max_h_ohw_oa = cdr_ohw_oa_metrics[2].max()
max_h_ohw_na = cdr_ohw_na_metrics[2].max()
max_h_nohw_oa = cdr_nohw_oa_metrics[2].max()
max_h_nohw_na = cdr_nohw_na_metrics[2].max()

# I10-Indices
max_i10_ohw_oa = cdr_ohw_oa_metrics[3].max()
max_i10_ohw_na = cdr_ohw_na_metrics[3].max()
max_i10_nohw_oa = cdr_nohw_oa_metrics[3].max()
max_i10_nohw_na = cdr_nohw_na_metrics[3].max()

# Print results
print("IF", max_if_ohw_oa, max_if_ohw_na, max_if_nohw_oa, max_if_nohw_na)
print("H", max_h_ohw_oa, max_h_ohw_na, max_h_nohw_oa, max_h_nohw_na)
print("I10", max_i10_ohw_oa, max_i10_ohw_na, max_i10_nohw_oa, max_i10_nohw_na)

IF 120.4 51.0 82.57142857142857 131.0
H 241 286 217 250
I10 6946 10533 8415 11527


### Get medians for citation metrics in each group

In [19]:
# Impact Factors
median_if_ohw_oa = cdr_ohw_oa_metrics[1].median()
median_if_ohw_na = cdr_ohw_na_metrics[1].median()
median_if_nohw_oa = cdr_nohw_oa_metrics[1].median()
median_if_nohw_na = cdr_nohw_na_metrics[1].median()

# H-Indices
median_h_ohw_oa = cdr_ohw_oa_metrics[2].median()
median_h_ohw_na = cdr_ohw_na_metrics[2].median()
median_h_nohw_oa = cdr_nohw_oa_metrics[2].median()
median_h_nohw_na = cdr_nohw_na_metrics[2].median()

# I10-Indices
median_i10_ohw_oa = cdr_ohw_oa_metrics[3].median()
median_i10_ohw_na = cdr_ohw_na_metrics[3].median()
median_i10_nohw_oa = cdr_nohw_oa_metrics[3].median()
median_i10_nohw_na = cdr_nohw_na_metrics[3].median()

# Print results
print("IF", median_if_ohw_oa, median_if_ohw_na, median_if_nohw_oa, median_if_nohw_na)
print("H", median_h_ohw_oa, median_h_ohw_na, median_h_nohw_oa, median_h_nohw_na)
print("I10", median_i10_ohw_oa, median_i10_ohw_na, median_i10_nohw_oa, median_i10_nohw_na)

IF 4.0 5.028445512820513 6.625 9.023809523809524
H 17.0 22.0 21.0 28.0
I10 25.0 36.5 34.0 55.0


## Open hardware cited in the full text of the referenced works (3rd connection to OHW)

This is to answer the question of whether one or more open hardware works added value to the work being published by being included as a referenced work.

In [22]:
ohw_ids = []
for id in ohw_oa_ids:
    ohw_ids.append(id)
for id in ohw_na_ids:
    ohw_ids.append(id)

cdr_ohw_oa_ref_match_count = sum(el in cdr_oa_ref_ids for el in ohw_ids)
cdr_ohw_na_ref_match_count = sum(el in cdr_na_ref_ids for el in ohw_ids)
print(cdr_ohw_oa_ref_match_count, cdr_ohw_na_ref_match_count)

# 9341 6183 14 minutes
# 14 4 26.4 s

14 4
