# Atom feed of tweeted new preprints by subject area

Preprints published in the past seven days tweeted at least three times as atom feed, grouped by subject area.

In [4]:
import sys
sys.path.append('../')

import ast
import pandas as pd
from datetime import datetime, date
from feedgen.feed import FeedGenerator

In [5]:
date = str(date.today().strftime('%Y-%m-%d'))
subject_areas = ['covid', 'medical_health-sciences', 'bioinformatics', 'biochemistry_cell-biology', 'cancer', 'ecology', 'evolutionary-biology', 'genetics', 'microbiology', 'neurosciences', 'physiology', 'plant-biology', 'zoology','other-biological-sciences']
preprints = pd.read_csv('../preprint_tweets/1101/preprint_tweets_' + date + '.csv', index_col=0)

# filter by section, and group all COVID-related preprints together
subsets = {}
subsets['covid'] = preprints[preprints['covid'] == True]
print('COVID: ' + str(len(subsets['covid'])))
subsets['medical_health-sciences'] = preprints[((preprints['archive'] == 'medRxiv')  | (preprints['subject-area'] == 'Pathology') | (preprints['subject-area'] == 'Pharmacology and Toxicology'))  & (preprints['covid'] == False)]
print('Medical and Health Sciences: ' + str(len(subsets['medical_health-sciences'])))
subsets['bioinformatics'] = preprints[(preprints['subject-area'] == 'Bioinformatics') & (preprints['covid'] == False)]
print('Bioinformatics: ' + str(len(subsets['bioinformatics'])))
subsets['biochemistry_cell-biology'] = preprints[((preprints['subject-area'] == 'Biochemistry') | (preprints['subject-area'] == 'Cell Biology') | (preprints['subject-area'] == 'Developmental Biology') | (preprints['subject-area'] == 'Synthetic Biology') | (preprints['subject-area'] == 'Systems Biology') | (preprints['subject-area'] == 'Molecular Biology')) & (preprints['covid'] == False)]
print('Biochemistry and Cell Biology: ' + str(len(subsets['biochemistry_cell-biology'])))
subsets['ecology'] = preprints[(preprints['subject-area'] == 'Ecology') & (preprints['covid'] == False)]
subsets['cancer'] = preprints[((preprints['subject-area'] == 'Cancer Biology') | (preprints['subject-area'] == 'Hematology') | (preprints['subject-area'] == 'Oncology')) & (preprints['covid'] == False)]
print('Cancer: ' + str(len(subsets['cancer'])))
print('Ecology: ' + str(len(subsets['ecology'])))
subsets['evolutionary-biology'] = preprints[(preprints['subject-area'] == 'Evolutionary Biology') & (preprints['covid'] == False)]
print('Evolutionary Biology: ' + str(len(subsets['evolutionary-biology'])))
subsets['genetics'] = preprints[(preprints['subject-area'] == 'Genetics') | (preprints['subject-area'] == 'Genomics') & (preprints['covid'] == False)]
print('Genetics: ' + str(len(subsets['genetics'])))
subsets['microbiology'] = preprints[((preprints['subject-area'] == 'Microbiology') | (preprints['subject-area'] == 'Immunology')) & (preprints['covid'] == False)]
print('Microbiology: ' + str(len(subsets['microbiology'])))
subsets['neurosciences'] = preprints[(preprints['subject-area'] == 'Neuroscience') & (preprints['covid'] == False)]
print('Neurosciences: ' + str(len(subsets['neurosciences'])))
subsets['physiology'] = preprints[(preprints['subject-area'] == 'Physiology') | (preprints['subject-area'] == 'Biophysics') & (preprints['covid'] == False)]
print('Physiology: ' + str(len(subsets['physiology'])))
subsets['plant-biology'] = preprints[(preprints['subject-area'] == 'Plant Biology') & (preprints['covid'] == False)]
print('Plant Biology: ' + str(len(subsets['plant-biology'])))
subsets['zoology'] = preprints[(preprints['subject-area'] == 'Zoology') | (preprints['subject-area'] == 'Animal Behavior and Cognition') & (preprints['covid'] == False)]
print('Zoology: ' + str(len(subsets['zoology'])))
subsets['other-biological-sciences'] = preprints[(preprints['archive'] != 'medRxiv') & (preprints['subject-area'] != 'Animal Behavior and Cognition') & (preprints['subject-area'] != 'Bioinformatics') & (preprints['subject-area'] != 'Biochemistry') & (preprints['subject-area'] != 'Biophysics') & (preprints['subject-area'] != 'Cancer Biology') & (preprints['subject-area'] != 'Cell Biology') & (preprints['subject-area'] != 'Developmental Biology') & (preprints['subject-area'] != 'Ecology') & (preprints['subject-area'] != 'Evolutionary Biology') & (preprints['subject-area'] != 'Genetics') & (preprints['subject-area'] != 'Genomics') & (preprints['subject-area'] != 'Immunology') & (preprints['subject-area'] != 'Microbiology') & (preprints['subject-area'] != 'Molecular Biology') & (preprints['subject-area'] != 'Neuroscience') & (preprints['subject-area'] != 'Physiology') & (preprints['subject-area'] != 'Plant Biology') & (preprints['subject-area'] != 'Synthetic Biology') & (preprints['subject-area'] != 'Systems Biology') & (preprints['subject-area'] != 'Zoology') & (preprints['covid'] == False)]
print('Other Biological Sciences: ' + str(len(subsets['other-biological-sciences'])))
subsets['microbiology'].head(50)

COVID: 38
Medical and Health Sciences: 15
Bioinformatics: 8
Biochemistry and Cell Biology: 37
Cancer: 2
Ecology: 5
Evolutionary Biology: 8
Genetics: 9
Microbiology: 25
Neurosciences: 18
Physiology: 6
Plant Biology: 9
Zoology: 1
Other Biological Sciences: 3


Unnamed: 0,doi,tweets,archive,subject-area,covid,title,authors,abstract,posted
7,10.1101/2021.10.25.465200,52,bioRxiv,Immunology,False,Cell-extrinsic autophagy in mature adipocytes ...,"[{'name': 'Felix Clemens Richter'}, {'name': '...",<p>Autophagy is a critical cellular recycling ...,2021-10-26
13,10.1101/2021.10.26.465989,37,bioRxiv,Microbiology,False,Evolutionary instability of collateral suscept...,"[{'name': 'Vidar Sørum'}, {'name': 'Emma L. Øy...",<p>Collateral sensitivity and resistance occur...,2021-10-26
14,10.1101/2021.10.25.465810,37,bioRxiv,Microbiology,False,Transmission of Klebsiella strains and plasmid...,"[{'name': 'Ben Vezina'}, {'name': 'Louise M. J...",<p>The Grey-headed flying fox (<italic>Pteropu...,2021-10-26
19,10.1101/2021.10.26.465843,25,bioRxiv,Microbiology,False,"Diversity, function and evolution of marine mi...","[{'name': 'Jianwei Chen'}, {'name': 'Yang Guo'...","<p>Trillions of marine bacterial, archaeal and...",2021-10-26
24,10.1101/2021.10.26.465934,23,bioRxiv,Microbiology,False,Open chromatin analysis in Trypanosoma cruzi l...,"[{'name': 'Alex Ranieri Jerônimo Lima'}, {'nam...",<sec><title>Background</title><p>Genomic organ...,2021-10-26
33,10.1101/2021.10.26.465990,17,bioRxiv,Microbiology,False,Geological processes mediate a subsurface micr...,"[{'name': 'Daniel A. Gittins'}, {'name': 'Pier...",<p>The deep biosphere is the largest microbial...,2021-10-26
50,10.1101/2021.10.29.466428,12,bioRxiv,Immunology,False,Assassination Tango: An NLR/NLR-ID immune rece...,"[{'name': 'Glykeria Mermigka'}, {'name': 'Argy...",<p>Plant immunity relies on cell-surface recep...,2021-10-29
51,10.1101/2021.10.22.465535,12,bioRxiv,Microbiology,False,Rare modification in the ergosterol biosynthes...,"[{'name': 'Milena Kordalewska'}, {'name': 'Kev...",<p>We determined amphotericin B (AmB) suscepti...,2021-10-29
54,10.1101/2021.10.26.466009,11,bioRxiv,Microbiology,False,Total parenteral nutrition drives glucose meta...,"[{'name': 'Haifeng Sun'}, {'name': 'Peng Wang'...",<p>The occurrence of glucose metabolism disord...,2021-10-27
59,10.1101/2021.10.26.465851,11,bioRxiv,Microbiology,False,Fungal auxin is a quorum-based modulator of bl...,"[{'name': 'Lihong Dong'}, {'name': 'Qing Shen'...",<p>Auxin is an important phytohormone regulati...,2021-10-26


In [6]:

for subject_area in subject_areas:
    fg = FeedGenerator()
    fg.id('https://front-matter.io/')
    if subject_area == 'covid':
        fg.title(f'Popular BioRxiv/medRxiv COVID preprints posted in the last week')
    else:
        fg.title(f'Popular {subject_area.replace("-", " ").replace("_", " and ")} preprints posted in the last week')
    fg.author( {'name':'Martin Fenner','email':'martin@front-matter.io'} )
    fg.link( href='https://front-matter.io', rel='alternate' )
    fg.link( href=f'https://front-matter.io/{subject_area}/feed.xml', rel='self' )
    fg.language('en')

    for index, row in subsets[subject_area].iterrows():
        fe = fg.add_entry(order='append')
        fe.title(row['title'] + f" ({row['tweets']} tweets)")
        fe.author(ast.literal_eval(row['authors']))
        fe.id('https://doi.org/' + row['doi'])
        fe.link({'href': 'https://doi.org/' + row['doi'], 'title': row['title'] + f" ({row['tweets']} tweets)"})
        fe.description(row['abstract'])
        fe.category({ 'term': row['subject-area']})
        fe.published(datetime.fromisoformat(row['posted'] + 'T00:00:00+00:00'))
                 
    fg.atom_file(f'{subject_area}/feed.xml')