# Try to get OpenAlex data on when ISU articles added

In [30]:
import pandas as pd
import requests
import plotly.express as px
import datetime

In [31]:
def build_filter(day):
    # build the 'filter' parameter
    filter_by_institution_id = 'institutions.ror:https://ror.org/04rswrd78'   # ROR ID for Iowa State
    filter_by_paratext = 'is_paratext:false'   # not cover, ToC, issue information, etc
    filter_by_type = 'type:journal-article'
    filter_by_publication_date = 'from_publication_date:'+day+',to_publication_date:'+day
    my_email = 'mailto=eschares@iastate.edu'

    all_filters = (filter_by_institution_id, filter_by_paratext, filter_by_type, filter_by_publication_date)
    filter_param = f'filter={",".join(all_filters)}'
    filter_param = filter_param + '&' + my_email
    #print(f'filter query parameter:\n  {filter_param}')

    # put the URL together
    total_url = f'https://api.openalex.org/works?{filter_param}'
    #print(f'complete URL:\n  {total_url}')
    return total_url

In [32]:
filtered_works_url = build_filter('2021-01-01')
filtered_works_url

'https://api.openalex.org/works?filter=institutions.ror:https://ror.org/04rswrd78,is_paratext:false,type:journal-article,from_publication_date:2021-01-01,to_publication_date:2021-01-01&mailto=eschares@iastate.edu'

In [33]:
all_days = pd.date_range('2021-01-01', '2021-12-31')
all_days

DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10',
               ...
               '2021-12-22', '2021-12-23', '2021-12-24', '2021-12-25',
               '2021-12-26', '2021-12-27', '2021-12-28', '2021-12-29',
               '2021-12-30', '2021-12-31'],
              dtype='datetime64[ns]', length=365, freq='D')

In [41]:
type(all_days)

pandas.core.indexes.datetimes.DatetimeIndex

In [22]:
# for testing
all_days2 = ['2021-01-01']

In [45]:
# clear the lists
days = []
count = []

for datetime in all_days:
    day=str(datetime)[0:10]
    
    filtered_works_url = build_filter(day)
    #print(filtered_works_url)
    
    api_response = requests.get(filtered_works_url)
    parsed_response = api_response.json()
    
    print(f"{day}: {parsed_response['meta']['count']}")
    
    days.append(day)
    count.append(parsed_response['meta']['count'])

2021-01-01: 230
2021-01-02: 22
2021-01-03: 1
2021-01-04: 10
2021-01-05: 11
2021-01-06: 6
2021-01-07: 8
2021-01-08: 8
2021-01-09: 4
2021-01-10: 1
2021-01-11: 10
2021-01-12: 7
2021-01-13: 12
2021-01-14: 4
2021-01-15: 17
2021-01-16: 2
2021-01-17: 1
2021-01-18: 6
2021-01-19: 6
2021-01-20: 5
2021-01-21: 4
2021-01-22: 7
2021-01-23: 0
2021-01-24: 0
2021-01-25: 6
2021-01-26: 5
2021-01-27: 7
2021-01-28: 11
2021-01-29: 8
2021-01-30: 2
2021-01-31: 0
2021-02-01: 122
2021-02-02: 9
2021-02-03: 5
2021-02-04: 10
2021-02-05: 11
2021-02-06: 5
2021-02-07: 5
2021-02-08: 6
2021-02-09: 7
2021-02-10: 5
2021-02-11: 5
2021-02-12: 8
2021-02-13: 2
2021-02-14: 3
2021-02-15: 17
2021-02-16: 10
2021-02-17: 6
2021-02-18: 7
2021-02-19: 5
2021-02-20: 4
2021-02-21: 1
2021-02-22: 6
2021-02-23: 7
2021-02-24: 9
2021-02-25: 6
2021-02-26: 3
2021-02-27: 4
2021-02-28: 5
2021-03-01: 120
2021-03-02: 6
2021-03-03: 7
2021-03-04: 17
2021-03-05: 4
2021-03-06: 3
2021-03-07: 2
2021-03-08: 9
2021-03-09: 8
2021-03-10: 9
2021-03-11: 6
20

In [47]:
d = {'date':days,
     'count':count,
    }
df = pd.DataFrame(data=d)
df.head(3)

Unnamed: 0,date,count
0,2021-01-01,230
1,2021-01-02,22
2,2021-01-03,1


In [53]:
px.line(df,x='date',y='count',
       title='Publication date of Iowa State-authored articles in 2021'
       )