# API Final Analysis
### Daina Bouquin, Daniel Chivvis

Scripts below were used to generate all .csv files in the API_RESULTS_AAS_ONLY_082819/ folder

In [3]:
import pandas as pd
import numpy as np

In [4]:
API_results = pd.read_csv("API_CLEAN_INPUT_AAS_ONLY_061019.csv") 

In [5]:
list(API_results.columns.values)

['Alias',
 'Software_Package',
 'Identifier',
 'Highlight',
 'Citation',
 'Bibcode',
 'Alternate_Bibcode',
 'BibGroup',
 'Publisher',
 'Article_ID',
 'DOI',
 'Pub_Year',
 'Pub_Date',
 'Author',
 'Title']

In [6]:
API_results.head(5)

Unnamed: 0,Alias,Software_Package,Identifier,Highlight,Citation,Bibcode,Alternate_Bibcode,BibGroup,Publisher,Article_ID,DOI,Pub_Year,Pub_Date,Author,Title
0,astropy.org,Astropy,0,http:&#x2F;&#x2F;docs.<em>astropy.org</em> The...,False,2018AJ....155..128M,2017arXiv171209631M,CfA,The Astronomical Journal,2017arXiv171209631M|2018AJ....155..128M|10.384...,10.3847/1538-3881/aaa47e,2018,2018-03-00,"Morris, Brett M.",astroplan: An Open Source Observation Planning...
1,astropy.org,Astropy,0,simulation. Halotools is an affiliated package...,False,2017AJ....154..190H,2016arXiv160604106H,CfA,The Astronomical Journal,2016arXiv160604106H|2017AJ....154..190H|10.384...,10.3847/1538-3881/aa859f,2017,2017-11-00,"Hearin, Andrew P.",Forward Modeling of Large-scale Structure: An ...
2,astropy.org,Astropy,0,and the astropy cosmology package53 53 http:&#...,False,2017AJ....153..107T,2016arXiv161105456T,NOAO|ESO/Telescopes|CFHT,The Astronomical Journal,2016arXiv161105456T|2017AJ....153..107T|10.384...,10.3847/1538-3881/aa5b8d,2017,2017-03-00,"Tie, S. S.",A Study of Quasar Selection in the Supernova F...
3,github.com/astropy/astroplan,Astropy,0,<em>https:&#x2F;&#x2F;github.com&#x2F;astropy&...,False,2018AJ....155..128M,2017arXiv171209631M,CfA,The Astronomical Journal,2017arXiv171209631M|2018AJ....155..128M|10.384...,10.3847/1538-3881/aaa47e,2018,2018-03-00,"Morris, Brett M.",astroplan: An Open Source Observation Planning...
4,astropy/astroplan,Astropy,0,<em>https:&#x2F;&#x2F;github.com&#x2F;astropy&...,True,2018AJ....155..128M,2017arXiv171209631M,CfA,The Astronomical Journal,2017arXiv171209631M|2018AJ....155..128M|10.384...,10.3847/1538-3881/aaa47e,2018,2018-03-00,"Morris, Brett M.",astroplan: An Open Source Observation Planning...


In [7]:
# Convert highlight col to string

API_results['Highlight'] = API_results['Highlight'].astype('|S')\

In [8]:
API_results.to_csv("API_FINAL_ANALYSIS_AAS_ONLY_082819.csv")

## Summary of Results

In [9]:
# How many papers did we find for each software package?
API_results.groupby('Software_Package')['Bibcode'].nunique()

Software_Package
Astroblend        1
Astropy          93
RADMC-3D        163
SAOImage DS9    272
Spec2d          222
Stringray         9
TARDIS            3
WCSTools        121
Name: Bibcode, dtype: int64

In [10]:
# Proportion of articles with a software alias in a citation
API_all_citations = API_results.loc[API_results['Citation'] == True]
API_cite_proportion = API_all_citations.groupby('Software_Package')['Bibcode'].nunique()/API_results.groupby('Software_Package')['Bibcode'].nunique()
API_cite_proportion

Software_Package
Astroblend           NaN
Astropy         0.763441
RADMC-3D        0.803681
SAOImage DS9    0.411765
Spec2d          0.527027
Stringray       0.555556
TARDIS          0.666667
WCSTools        0.628099
Name: Bibcode, dtype: float64

In [11]:
# How many unique aliases were used in citations for each package?
print (API_all_citations.groupby('Software_Package')['Alias'].nunique())

Software_Package
Astropy         12
RADMC-3D         9
SAOImage DS9    11
Spec2d          11
Stringray        2
TARDIS           2
WCSTools         9
Name: Alias, dtype: int64


In [12]:
# For each package count number of articles that mentioned their identifiers
API_ID_only = API_results.loc[API_results['Identifier'] == 1]
API_ID_only = pd.DataFrame({'count' : API_ID_only.groupby(["Software_Package", "Alias"])['Bibcode'].nunique()})
API_ID_only.to_csv("API_ID_only_AAS_082819.csv")
API_ID_only

Unnamed: 0_level_0,Unnamed: 1_level_0,count
Software_Package,Alias,Unnamed: 2_level_1
RADMC-3D,ascl:1202.015,1
Spec2d,arXiv:1203.3192,1
Spec2d,ascl:1203.003,1


In [13]:
# For each package count number of articles that mentioned their aliases that aren't identifiers
API_non_ID_only = API_results.loc[API_results['Identifier'] == 0]
API_non_ID_only = pd.DataFrame({'count' : API_non_ID_only.groupby(["Software_Package", "Alias"])['Bibcode'].nunique()})
API_non_ID_only.to_csv("API_non_ID_only_AAS_082819.csv")
API_non_ID_only

Unnamed: 0_level_0,Unnamed: 1_level_0,count
Software_Package,Alias,Unnamed: 2_level_1
Astroblend,AstroBlend,1
Astroblend,astroblend.com,1
Astropy,AstroPy,93
Astropy,Astropy Collaboration,63
Astropy,Astropy Collaboration 2013,62
Astropy,Astropy Collaboration et al. (2013),65
Astropy,Astropy Collaboration et al. (2018),1
Astropy,Astropy Collaboration et al. 2013,65
Astropy,Astropy Collaboration et al. 2018,1
Astropy,"Astropy Collaboration, 2013",62


In [14]:
# total mentions of software aliases over time
API_over_time = pd.DataFrame({'year_count' : API_results.groupby(["Software_Package", "Pub_Year"])['Bibcode'].nunique()}).reset_index()
# write results
API_over_time.to_csv("API_over_time_AAS_ONLY_082819.csv")
# show results over time
API_over_time

Unnamed: 0,Software_Package,Pub_Year,year_count
0,Astroblend,2016,1
1,Astropy,2013,1
2,Astropy,2014,6
3,Astropy,2015,6
4,Astropy,2016,27
5,Astropy,2017,36
6,Astropy,2018,17
7,RADMC-3D,2004,1
8,RADMC-3D,2005,1
9,RADMC-3D,2006,1


In [15]:
#Alias per paper

API_alias_paper = pd.DataFrame(API_results.groupby(['Software_Package','DOI'])['Alias'].nunique())
API_alias_paper.to_csv("API_alias_paper_AAS_ONLY_082819.csv")
API_alias_paper

Unnamed: 0_level_0,Unnamed: 1_level_0,Alias
Software_Package,DOI,Unnamed: 2_level_1
Astroblend,10.3847/0004-637X/818/2/115,2
Astropy,10.1088/0004-637X/784/2/137,6
Astropy,10.1088/0004-637X/798/1/24,7
Astropy,10.1088/0004-637X/806/2/260,6
Astropy,10.1088/0004-637X/814/2/108,1
Astropy,10.1088/0004-637X/815/1/4,6
Astropy,10.1088/0004-637X/815/1/77,6
Astropy,10.1088/0067-0049/219/1/14,7
Astropy,10.1088/2041-8205/776/2/L34,4
Astropy,10.1088/2041-8205/780/1/L13,6


In [16]:
# Software citations per paper

API_alias_citations = pd.DataFrame(API_results.groupby(['Software_Package','Citation'])['Bibcode'].nunique())
API_alias_citations.to_csv("API_alias_citations_AAS_ONLY_082819.csv")
API_alias_citations

Unnamed: 0_level_0,Unnamed: 1_level_0,Bibcode
Software_Package,Citation,Unnamed: 2_level_1
Astroblend,False,1
Astropy,False,30
Astropy,True,71
RADMC-3D,False,55
RADMC-3D,True,131
SAOImage DS9,False,168
SAOImage DS9,True,112
Spec2d,False,123
Spec2d,True,117
Stringray,False,5
