# Bots in Science 🧪

## Libraries

In [1]:
import pandas as pd

In [2]:
## 1. Open Access

In [3]:
データ_oa = pd.read_csv('data/oa_papers.tsv', sep='\t', encoding='UTF-8')
データ_oa.doi = データ_oa.doi.str.lower()
データ_oa.open_access_type.fillna('Closed', inplace=True)
データ_oa.shape

(11980314, 3)

In [4]:
データ_oa.open_access_type.value_counts()

Closed             4941733
Green published    2105734
Gold               2095989
Green submitted    1188094
Hybrid              577567
Bronze              550743
Green accepted      520454
Name: open_access_type, dtype: int64

In [5]:
# https://webofscience.help.clarivate.com/en-us/Content/open-access.html
# https://peerj.com/articles/4375/?ref=blog.oa.works

SyntaxError: invalid syntax (4025302706.py, line 1)

In [6]:
データ_oa.loc[データ_oa.open_access_type=='Green published', 'open_access_type'] = 'Green'
データ_oa.loc[データ_oa.open_access_type=='Green submitted', 'open_access_type'] = 'Green'
データ_oa.loc[データ_oa.open_access_type=='Green accepted', 'open_access_type'] = 'Green'
データ_oa = データ_oa.drop_duplicates()
データ_oa.open_access_type.value_counts()

Closed    4673248
Green     3152252
Gold      2088183
Hybrid     576446
Bronze     548397
Name: open_access_type, dtype: int64

In [20]:
データ_oa_oa = データ_oa[データ_oa.open_access_type.isin(['Gold', 'Green'])].copy()
データ_oa_other = データ_oa[~データ_oa.open_access_type.isin(['Gold', 'Green'])].copy()

In [21]:
データ_oa_oa.shape

(5240435, 3)

In [22]:
データ_oa_oa = データ_oa_oa.sort_values('open_access_type')
データ_oa_oa = データ_oa_oa.groupby(['doi', 'is_open_access']).first().reset_index()

In [23]:
データ_oa_oa.shape

(3695656, 3)

In [31]:
データ_oa = pd.concat([データ_oa_oa, データ_oa_other])

## 1. Overall

In [26]:
データ_tw_men = pd.read_csv('data/final_mentions_full_bots.tsv', sep='\t', encoding='UTF-8',
                         dtype={'Outlet or Author':str, 'External Mention ID':str})
データ_tw_men.DOI = データ_tw_men.DOI.str.lower()
データ_tw_men.shape

(51230936, 6)

In [32]:
データ_tw_men_oa = データ_tw_men.merge(データ_oa, how='inner', left_on='DOI', right_on='doi')
データ_tw_men_oa.shape

(65499520, 9)

In [33]:
データ_tw_men_oa = データ_tw_men_oa.loc[データ_tw_men_oa.Original==1, ['DOI', 'bot', 'open_access_type']].drop_duplicates()
データ_tw_men_oa.shape

(5123438, 3)

In [34]:
データ_tw_men_count = データ_tw_men_oa[['DOI', 'bot']].drop_duplicates().groupby('bot').count()
データ_tw_men_count = データ_tw_men_count.pivot_table(values = 'DOI', index='bot').reset_index()
データ_tw_men_oa_count = データ_tw_men_oa.groupby(['bot', 'open_access_type']).count().reset_index()
データ_tw_men_oa_count = データ_tw_men_oa_count.pivot_table(values = 'DOI', index='bot', columns = 'open_access_type').reset_index()
データ_tw_men_oa_count = データ_tw_men_oa_count.merge(データ_tw_men_count, how='inner', on='bot')

In [35]:
データ_tw_men_oa_count

Unnamed: 0,bot,Bronze,Closed,Gold,Green,Hybrid,DOI
0,0,299150,1252642,916418,868389,332057,3255145
1,1,122647,395505,382039,421448,133143,1267258


## 2. ESI field

In [7]:
データ_tw_men_esi = pd.read_csv('data/final_mentions_full_bots_esi.tsv', sep='\t', encoding='UTF-8',
                             dtype={'Outlet or Author':str, 'External Mention ID':str})
データ_tw_men_esi.DOI = データ_tw_men_esi.DOI.str.lower()
データ_tw_men_esi.shape

(66269090, 7)

In [8]:
データ_tw_men_esi_oa = データ_tw_men_esi.merge(データ_oa, how='inner', left_on='DOI', right_on='doi')
データ_tw_men_esi_oa.shape

(99681388, 10)

In [9]:
データ_tw_men_esi_oa = データ_tw_men_esi_oa.loc[データ_tw_men_esi_oa.Original==1, ['ESI', 'DOI', 'bot', 'open_access_type']].drop_duplicates()
データ_tw_men_esi_oa.shape

(8657816, 4)

In [16]:
データ_tw_men_esi_count = データ_tw_men_esi_oa[['ESI', 'DOI', 'bot']].drop_duplicates().groupby(['ESI', 'bot']).count()
データ_tw_men_esi_count = データ_tw_men_esi_count.pivot_table(values = 'DOI', index=['bot', 'ESI']).reset_index()
データ_tw_men_esi_oa_count = データ_tw_men_esi_oa.groupby(['ESI', 'bot', 'open_access_type']).count().reset_index()
データ_tw_men_esi_oa_count = データ_tw_men_esi_oa_count.pivot_table(values = 'DOI', index=['bot', 'ESI'], columns = 'open_access_type').reset_index()
データ_tw_men_esi_oa_count = データ_tw_men_esi_oa_count.merge(データ_tw_men_esi_count, how='inner', on=['bot', 'ESI'])

In [17]:
データ_tw_men_esi_oa_count

Unnamed: 0,bot,ESI,Bronze,Closed,Gold,Green,Hybrid,DOI
0,0,Agricultural Sciences,9066,44335,29519,43280,10648,103814
1,0,Arts & Humanities,3454,35735,3904,15919,7491,59208
2,0,Biology & Biochemistry,31732,72040,69839,144457,33925,234690
3,0,Chemistry,11709,197979,60494,128192,31190,351362
4,0,Clinical Medicine,122601,367414,241987,470853,88400,931925
5,0,Computer Science,5293,31607,17832,40717,9121,81336
6,0,Economics & Business,4665,43916,1484,32289,10825,82270
7,0,Engineering,10330,133325,52577,99399,24638,264596
8,0,Environment/Ecology,16138,99458,69300,98713,26937,238800
9,0,Geosciences,12355,56825,41106,59023,17935,144918
