# Atom feed of highly tweeted new preprints by subject area

Preprints published in the past seven days tweeted at least five times as atom feed, grouped by subject area.

In [60]:
import sys
sys.path.append('../')

import ast
import pandas as pd
from datetime import datetime, date
from feedgen.feed import FeedGenerator

In [61]:
subject_areas = ['covid', 'microbiology', 'neuroscience', 'molecular_biology', 'bioinformatics', 'genomics', 'genetics', 'developmental_biology', 'plant_biology', 'evolutionary_biology', 'ecology', 'immunology', 'cancer_biology']
preprints = pd.read_csv('../preprint_tweets/1101/preprint_tweets_' + str(date.today().strftime('%Y-%m-%d')) + '.csv', index_col=0)

# filter by section, and group all COVID-related preprints together
subsets = {}
subsets['covid'] = preprints[preprints['covid'] == True]
subsets['microbiology'] = preprints[(preprints['subject-area'] == 'Microbiology') & (preprints['covid'] == False)]
subsets['neuroscience'] = preprints[(preprints['subject-area'] == 'Neuroscience') & (preprints['covid'] == False)]
subsets['molecular_biology'] = preprints[(preprints['subject-area'] == 'Molecular Biology') & (preprints['covid'] == False)]
subsets['bioinformatics'] = preprints[(preprints['subject-area'] == 'Bioinformatics') & (preprints['covid'] == False)]
subsets['genomics'] = preprints[(preprints['subject-area'] == 'Genomics') & (preprints['covid'] == False)]
subsets['genetics'] = preprints[(preprints['subject-area'] == 'Genetics') & (preprints['covid'] == False)]
subsets['developmental_biology'] = preprints[(preprints['subject-area'] == 'Developmental Biology') & (preprints['covid'] == False)]
subsets['plant_biology'] = preprints[(preprints['subject-area'] == 'Plant Biology') & (preprints['covid'] == False)]
subsets['evolutionary_biology'] = preprints[(preprints['subject-area'] == 'Evolutionary Biology') & (preprints['covid'] == False)]
subsets['ecology'] = preprints[(preprints['subject-area'] == 'Ecology') & (preprints['covid'] == False)]
subsets['immunology'] = preprints[(preprints['subject-area'] == 'Immunology') & (preprints['covid'] == False)]
subsets['cancer_biology'] = preprints[(preprints['subject-area'] == 'Cancer Biology') & (preprints['covid'] == False)]
subsets['genetics'].head(50)

Unnamed: 0,doi,tweets,archive,subject-area,covid,title,authors,abstract,posted
12,10.1101/2021.05.11.443595,80,bioRxiv,Genetics,False,Epigenetic Inheritance is Gated by Naïve Pluri...,"[{'name': 'Valentina Carlini'}, {'name': 'Cris...",<p>Environmental factors can trigger cellular ...,2021-05-11
22,10.1101/2021.05.09.443220,50,bioRxiv,Genetics,False,SCON - A Short Conditional intrON for conditio...,"[{'name': 'Szu-Hsien Sam Wu'}, {'name': 'Réka ...",<p>The generation of conditional alleles using...,2021-05-10
42,10.1101/2021.05.10.443466,32,bioRxiv,Genetics,False,Genetic determinants of chromatin reveal prost...,"[{'name': 'Sylvan C. Baca'}, {'name': 'Cassand...",<p>Methods that link genetic variation to stea...,2021-05-11
55,10.1101/2021.05.10.443352,25,bioRxiv,Genetics,False,The genetic architecture of phenotypic diversi...,"[{'name': 'Wanchang Zhang'}, {'name': 'Hongru ...",<p>The Betta fish displays a remarkable variet...,2021-05-10
93,10.1101/2021.05.12.443797,16,bioRxiv,Genetics,False,Robust detection of natural selection using a ...,"[{'name': 'Enes Dilber'}, {'name': 'Jonathan T...",<p>Neutrality tests such as Tajima’s <italic>D...,2021-05-13
137,10.1101/2021.05.10.443088,10,bioRxiv,Genetics,False,Natural Killer cells demonstrate distinct eQTL...,"[{'name': 'James J Gilchrist'}, {'name': 'Seik...",<p>Natural Killer (NK) cells are innate lympho...,2021-05-11
145,10.1101/2021.05.13.444000,9,bioRxiv,Genetics,False,Transcriptional landscape of human microglia r...,"[{'name': 'Tulsi Patel'}, {'name': 'Troy P Car...",<p>Microglia have fundamental roles in health ...,2021-05-16
201,10.1101/2021.05.10.443361,6,bioRxiv,Genetics,False,Comprehensive analysis of meiosis-derived cDNA...,"[{'name': 'Tina L. Sing'}, {'name': 'Katie Con...",<p>Gametogenesis is a highly regulated and dyn...,2021-05-10
226,10.1101/2021.05.09.443284,5,bioRxiv,Genetics,False,Population Structure of Nation-wide Rice in Th...,"[{'name': 'Phanchita Vejchasarn'}, {'name': 'J...",<sec><title>Background</title><p>Thailand is a...,2021-05-10


In [62]:

for subject_area in subject_areas:
    fg = FeedGenerator()
    fg.id('https://front-matter.io/')
    if subject_area == 'covid':
        fg.title(f'Popular BioRxiv/medRxiv COVID preprints posted in the last week')
    else:
        fg.title(f'Popular BioRxiv {subject_area.replace("_", " ")} preprints posted in the last week')
    fg.author( {'name':'Martin Fenner','email':'martin@front-matter.io'} )
    fg.link( href='https://front-matter.io', rel='alternate' )
    fg.link( href=f'https://front-matter.io/{subject_area}/feed.xml', rel='self' )
    fg.language('en')

    for index, row in subsets[subject_area].iterrows():
        fe = fg.add_entry(order='append')
        fe.title(row['title'] + f" ({row['tweets']} tweets)")
        fe.author(ast.literal_eval(row['authors']))
        fe.id('https://doi.org/' + row['doi'])
        fe.link({'href': 'https://doi.org/' + row['doi'], 'title': row['title'] + f" ({row['tweets']} tweets)"})
        fe.description(row['abstract'])
        fe.category({ 'term': row['subject-area']})
        fe.published(datetime.fromisoformat(row['posted'] + 'T00:00:00+00:00'))
                 
    fg.atom_file(f'{subject_area}/feed.xml')