# Atom feed of tweeted new preprints by subject area

Preprints published in the past seven days tweeted at least three times as atom feed, grouped by subject area.

In [1]:
import sys
sys.path.append('../')

import ast
import pandas as pd
from datetime import datetime, date
from feedgen.feed import FeedGenerator

In [2]:
subject_areas = ['covid', 'medical_health-sciences', 'bioinformatics', 'biochemistry_cell-biology', 'ecology', 'evolutionary-biology', 'genetics', 'microbiology', 'physiology', 'plant-biology', 'zoology','other-biological-sciences']
preprints = pd.read_csv('../preprint_tweets/1101/preprint_tweets_' + str(date.today().strftime('%Y-%m-%d')) + '.csv', index_col=0)

# filter by section, and group all COVID-related preprints together
subsets = {}
subsets['covid'] = preprints[preprints['covid'] == True]
subsets['medical_health-sciences'] = preprints[(preprints['archive'] == 'medRxiv') & (preprints['covid'] == False)]
subsets['bioinformatics'] = preprints[(preprints['subject-area'] == 'Bioinformatics') & (preprints['covid'] == False)]
subsets['biochemistry_cell-biology'] = preprints[(preprints['subject-area'] == 'Biochemistry') | (preprints['subject-area'] == 'Cell Biology') & (preprints['covid'] == False)]
subsets['ecology'] = preprints[(preprints['subject-area'] == 'Ecology') & (preprints['covid'] == False)]
subsets['evolutionary-biology'] = preprints[(preprints['subject-area'] == 'Evolutionary Biology') & (preprints['covid'] == False)]
subsets['genetics'] = preprints[(preprints['subject-area'] == 'Genetics') & (preprints['covid'] == False)]
subsets['microbiology'] = preprints[(preprints['subject-area'] == 'Microbiology') & (preprints['covid'] == False)]
subsets['physiology'] = preprints[(preprints['subject-area'] == 'Physiology') & (preprints['covid'] == False)]
subsets['plant-biology'] = preprints[(preprints['subject-area'] == 'Plant Biology') & (preprints['covid'] == False)]
subsets['zoology'] = preprints[(preprints['subject-area'] == 'Zoology') & (preprints['covid'] == False)]
subsets['other-biological-sciences'] = preprints[(preprints['subject-area'] != 'Bioinformatics') & (preprints['subject-area'] != 'Biochemistry') & (preprints['subject-area'] != 'Cell Biology') & (preprints['subject-area'] != 'Ecology') & (preprints['subject-area'] != 'Evolutionary Biology') & (preprints['subject-area'] != 'Genetics') & (preprints['subject-area'] != 'Microbiology') & (preprints['subject-area'] != 'Physiology') & (preprints['subject-area'] != 'Plant Biology') & (preprints['subject-area'] != 'Zoology') & (preprints['covid'] == False)]
subsets['other-biological-sciences'].head(50)

Unnamed: 0,doi,tweets,archive,subject-area,covid,title,authors,abstract,posted
7,10.1101/2021.07.23.453492,57,bioRxiv,Scientific Communication and Education,False,Delineating Medical Education: Bibliometric Re...,"[{'name': 'Lauren A. Maggio'}, {'name': 'Anton...",<sec><title>Background</title><p>The field of ...,2021-07-26
9,10.1101/2021.07.22.453458,46,bioRxiv,Cancer Biology,False,Epithelial-mesenchymal plasticity determines e...,"[{'name': 'Patrick Aouad'}, {'name': 'Yueyun Z...",<p>Estrogen receptor α-positive (ER+) breast c...,2021-07-26
10,10.1101/2021.07.30.454514,44,bioRxiv,Biophysics,False,Length Biases in Single-Cell RNA Sequencing of...,"[{'name': 'Gennady Gorin'}, {'name': 'Lior Pac...",<p>Single-molecule pre-mRNA and mRNA sequencin...,2021-07-31
13,10.1101/2021.07.27.453460,29,bioRxiv,Developmental Biology,False,Extracellular mechanical forces drive endocard...,"[{'name': 'Julien Vermot'}, {'name': 'Helene V...",<p>Organ morphogenesis involves dynamic change...,2021-07-27
16,10.1101/2021.07.26.453816,23,bioRxiv,Neuroscience,False,Synaptic Mechanisms of Top-Down Control by The...,"[{'name': 'Hannah M. Oberle'}, {'name': 'Alex ...",<p>Corticofugal projections to evolutionarily ...,2021-07-26
17,10.1101/2021.07.25.453647,21,bioRxiv,Cancer Biology,False,Oncogenic c-Myc induces replication stress by ...,"[{'name': 'Silvia Peripolli'}, {'name': 'Tanya...",<p>Oncogene-induced replication stress is a ma...,2021-07-26
19,10.1101/2021.07.25.453678,16,bioRxiv,Neuroscience,False,Reward-dependent selection of feedback gains i...,"[{'name': 'Antoine De Comite'}, {'name': 'Fréd...",<p>Expected reward is known to affect planning...,2021-07-26
20,10.1101/2021.07.26.21260947,16,medRxiv,Respiratory Medicine,False,Prospective validation of host transcriptomic ...,"[{'name': 'Simon C. Mendelsohn'}, {'name': 'St...",<p>We tested performance of host-blood transcr...,2021-07-28
21,10.1101/2021.07.29.454071,15,bioRxiv,Neuroscience,False,Age-Related Differences in Ventral Striatal an...,"[{'name': 'Dominic S. Fareri'}, {'name': 'Kath...",<p>Social relationships change across the life...,2021-07-30
24,10.1101/2021.07.25.453674,14,bioRxiv,Biophysics,False,Role of non-specific interactions in the phase...,"[{'name': 'Rakesh Krishnan'}, {'name': 'Srivas...",<p>Phase separation of biomolecules could be m...,2021-07-26


In [3]:

for subject_area in subject_areas:
    fg = FeedGenerator()
    fg.id('https://front-matter.io/')
    if subject_area == 'covid':
        fg.title(f'Popular BioRxiv/medRxiv COVID preprints posted in the last week')
    else:
        fg.title(f'Popular {subject_area.replace("-", " ").replace("_", " and ")} preprints posted in the last week')
    fg.author( {'name':'Martin Fenner','email':'martin@front-matter.io'} )
    fg.link( href='https://front-matter.io', rel='alternate' )
    fg.link( href=f'https://front-matter.io/{subject_area}/feed.xml', rel='self' )
    fg.language('en')

    for index, row in subsets[subject_area].iterrows():
        fe = fg.add_entry(order='append')
        fe.title(row['title'] + f" ({row['tweets']} tweets)")
        fe.author(ast.literal_eval(row['authors']))
        fe.id('https://doi.org/' + row['doi'])
        fe.link({'href': 'https://doi.org/' + row['doi'], 'title': row['title'] + f" ({row['tweets']} tweets)"})
        fe.description(row['abstract'])
        fe.category({ 'term': row['subject-area']})
        fe.published(datetime.fromisoformat(row['posted'] + 'T00:00:00+00:00'))
                 
    fg.atom_file(f'{subject_area}/feed.xml')