In [59]:
# Load data files related to the screening step
import bibtexparser
import csv

data_full_collection = None
data_screened_collection = None
data_statistics = []
data_timeline = []

with open('../0_Merged_Full_Collection_15010.bib', encoding='utf-8') as bibtex_file:
    data_full_collection = bibtexparser.load(bibtex_file)

with open('../1_Screened_4915.bib', encoding='utf-8') as bibtex_file:
    data_screened_collection = bibtexparser.load(bibtex_file)
    
with open('../1_Screening_statistics.txt', newline = '') as tab_file:                                                                                          
    tmp = csv.DictReader(tab_file, delimiter='\t')
    for item in tmp:
        data_statistics.append(item)

with open('../1_Screening_timeline.txt', newline = '') as tab_file:                                                                                          
    tmp = csv.DictReader(tab_file, delimiter='\t')
    for item in tmp:
        data_timeline.append(item)

    

In [60]:
#Some statistics on full collection
total_items = len(data_full_collection.entries)
print("Total items: ", total_items)
years = {}
months = {}
year_months = {}
types = {}
for entry in data_full_collection.entries:
    yr = "NULL"
    if("year" in entry):
        yr = entry["year"]
    mn = "NULL"
    if("month" in entry):
        mn = entry["month"]
    yr_mn = str(yr)+mn
    typ = entry["ENTRYTYPE"]
    if yr in years:
        years[yr] = years[yr] + 1
    else:
        years[yr] = 1
    if mn in months:
        months[mn] = months[mn] + 1
    else:
        months[mn] = 1
    if yr_mn in year_months:
        year_months[yr_mn] = year_months[yr_mn] + 1
    else:
        year_months[yr_mn] = 1
    if typ in types:
        types[typ] = types[typ] + 1
    else:
        types[typ] = 1
    
full_years = years
full_months = months
full_year_months = year_months
full_types = types
        
        

Total items:  15010


In [61]:
#Some statistics on screened collection
screened_items = len(data_screened_collection.entries)
print("Total screened items: ", screened_items)
years = {}
months = {}
year_months = {}
types = {}
for entry in data_screened_collection.entries:
    yr = "NULL"
    if("year" in entry):
        yr = entry["year"]
    mn = "NULL"
    if("month" in entry):
        mn = entry["month"]
    yr_mn = str(yr)+mn
    typ = entry["ENTRYTYPE"]
    if yr in years:
        years[yr] = years[yr] + 1
    else:
        years[yr] = 1
    if mn in months:
        months[mn] = months[mn] + 1
    else:
        months[mn] = 1
    if yr_mn in year_months:
        year_months[yr_mn] = year_months[yr_mn] + 1
    else:
        year_months[yr_mn] = 1
    if typ in types:
        types[typ] = types[typ] + 1
    else:
        types[typ] = 1
    

Total screened items:  4915


In [69]:
# Yearly distribution
print("Type : screened / total (percent)")
print("=================================")
for typ in sorted(types):
    print(typ,":",types[typ],"/",full_types[typ],"(",str(round(100*types[typ]/full_types[typ], 2)),"%)")


Year : screened / total (percent)
article : 2385 / 7137 ( 33.42 %)
book : 12 / 74 ( 16.22 %)
conference : 569 / 1861 ( 30.57 %)
incollection : 12 / 67 ( 17.91 %)
inproceedings : 1937 / 5871 ( 32.99 %)


In [None]:
for year in sorted(years):
    print(year,":",years[year],"/",full_years[year],"(",str(round(100*years[year]/full_years[year], 2)),"%)")

In [63]:
import plotly.graph_objects as go

yrs = sorted(years)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=yrs,
    y=[full_years[x] for x in yrs],
    name='All identified',
    marker_color='lightsalmon'
))
fig.add_trace(go.Bar(
    x=yrs,
    y= [years[x] for x in yrs],
    name='Screened',
    marker_color='indianred'
))


# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [64]:
print(data_timeline)
weeks = []

data_timeline.insert(0,{"Week":0, "Screened_per_week":0, "Total_screened":0})

for row in data_timeline:
    weeks.append(row["Total_screened"])

[{'Week': '1', 'Screened_per_week': '1046', 'Total_screened': '1046'}, {'Week': '2', 'Screened_per_week': '446', 'Total_screened': '1492'}, {'Week': '3', 'Screened_per_week': '764', 'Total_screened': '2256'}, {'Week': '4', 'Screened_per_week': '1057', 'Total_screened': '3313'}, {'Week': '5', 'Screened_per_week': '571', 'Total_screened': '3884'}, {'Week': '6', 'Screened_per_week': '726', 'Total_screened': '4610'}, {'Week': '7', 'Screened_per_week': '1167', 'Total_screened': '5777'}, {'Week': '8', 'Screened_per_week': '1546', 'Total_screened': '7323'}, {'Week': '9', 'Screened_per_week': '1280', 'Total_screened': '8603'}, {'Week': '10', 'Screened_per_week': '521', 'Total_screened': '9124'}, {'Week': '11', 'Screened_per_week': '757', 'Total_screened': '9881'}, {'Week': '12', 'Screened_per_week': '2068', 'Total_screened': '11949'}, {'Week': '13', 'Screened_per_week': '831', 'Total_screened': '12780'}, {'Week': '14', 'Screened_per_week': '1081', 'Total_screened': '13861'}, {'Week': '15', 'Sc

In [66]:
import numpy as np

x_ax = np.arange(len(weeks))

fig = go.Figure(data=go.Scatter(x=x_ax, y=weeks, name="Cumulative"))
fig.add_trace(go.Bar(
    x=x_ax,
    y=[data_timeline[x]["Screened_per_week"] for x in x_ax],
    name='Per week',
    marker_color='lightsalmon'
))
fig.show()