## Covid Related Papers

In [1]:
%load_ext autoreload
%autoreload 2
MAX_COLWIDTH = 200
MAX_ROWS = 2000

In [2]:
import pandas as pd
pd.options.display.max_colwidth = MAX_COLWIDTH
pd.options.display.max_rows = MAX_ROWS
from cord.cord19 import ResearchPapers

In [3]:
research_papers = ResearchPapers.load()
research_papers.save()
#research_papers = ResearchPapers.from_pickle()

Loading metadata from data\CORD-19-research-challenge
Cleaning metadata

Indexing research papers
Creating the BM25 index from the abstracts of the papers
Use index="text" if you want to index the texts of the paper instead
Finished Indexing in 36.0 seconds
Saving to data\ResearchPapers.pickle


## When was SARS-COV2 first noticed

In [4]:
metadata = research_papers.metadata
has_wuhan = metadata.abstract.str.contains('Wuhan')
before_nov19 = metadata.published < '2019-11-01'
cols = ['title', 'abstract', 'published', 'doi', 'sha']

### Which Papers mention Wuhan

In [5]:
wuhan_papers = metadata.loc[has_wuhan & (~before_nov19), cols].sort_values(['published'])
wuhan_papers

Unnamed: 0,title,abstract,published,doi,sha
1615,Coronaviruses: a paradigm of new emerging zoonotic diseases,"A novel type of coronavirus (2019-nCoV) infecting humans appeared in Wuhan, China, at the end of December 2019. Since the identification of the outbreak the infection quickly spread involving in o...",2019-12-01,10.1093/femspd/ftaa006,
44695,First case of Coronavirus Disease 2019 (COVID-19) pneumonia in Taiwan,"An outbreak of respiratory illness proved to be infected by a 2019 novel coronavirus, officially named Coronavirus Disease 2019 (COVID-19), was notified first in Wuhan, China, and has spread rapid...",2020-01-01,10.1016/j.jfma.2020.02.007,3e5edc4ff36064478e209800a15365cd5f710756; 83a4b68c7d9dbbd106424dee508c4197aa20f4f9
2283,New coronavirus pneumonia and outbreak epidemic virus and eye disease,"Since the outbreak of the new coronavirus pneumonia (NCP) in Wuhan City, China, the main transmission mode as well as the diagnosis and treatment of NCP have become a focus of research in China an...",2020-01-01,,
2287,Limiting spread of COVID-19 from cruise ships - lessons to be learnt from Japan | QJM: An International Journal of Medicine | Oxford Academic,"Spread of COVID-19 infection on a Cruise Ship in Yokohama, Japan Japan's response to the novel coronavirus (COVID-19) infection has been problematic since the outbreak was first reported in China....",2020-01-01,,
2289,First case of severe childhood novel coronavirus pneumonia in China TT - 中华儿科杂志,"One patient with a complaint of ""intermittent diarrhea, vomiting for 6 days, fever with shortness of breath for half a day"" was referred to the Department of Critical Medicine, Wuhan Children's H...",2020-01-01,,
2291,Proposed management of 2019-novel coronavirus infection during pregnancy and puerperium,"The 2019 novel coronavirus (2019-nCoV) infection has spread throughout China since the first case was identified in Wuhan, Hubei Province, in December 2019. According to previous knowledge and exp...",2020-01-01,,
2305,How Ophthalmologists Should Understand and Respond to the Current Epidemic of Novel Coronavirus Pneumonia (COVID-19),"The new coronavirus pneumonia that first appeared in Wuhan, China, in December 2019 has attracted great attention from both the Chinese government and the international community. The Internationa...",2020-01-01,,
2309,The laboratory risk assessment and control testing 2019 novel coronavirus in biosafety class II laboratories,"The outbreak of 2019 Novel Coronavirus (2019-nCoV) has spread from Wuhan to the whole country. After the Spring Festival, workers will return to workplace and students will return to school. There...",2020-01-01,,
2311,Recommendations for clinical management of children and adolescents with chronic heart failure during the epidemic period of novel coronavirus pnuemonia,"The outbreak of 2019 novel coronavirus pneumonia(COVID-19) in Wuhan, Hubei, China in December 2019 has spread to all parts of the country.Epidemiology showed that the population is generally susce...",2020-01-01,,
2312,The outbreak of SARS-CoV-2 pneumonia calls for viral vaccines,"The outbreak of 2019-novel coronavirus disease (COVID-19) that is caused by SARS-CoV-2 has spread rapidly in China, and has developed to be a Public Health Emergency of International Concern. Howe...",2020-01-01,,


In [6]:
wuhan_papers[wuhan_papers.published.isnull()];

#### TODO Fix

In [None]:
from collections import Counter, defaultdict
from cord.text import clean, tokenize

def get_word_count(research_paper):
    word_counts = defaultdict(int)
    index_tokens = research_paper.metadata.abstract.apply(clean).apply(tokenize)
    for row in index_tokens.tolist():
        for word in row:
            word_counts[word] +=1

    word_counts = pd.DataFrame({'word': list(word_counts.keys()), 
                  'count': list(word_counts.values())}).sort_values(['count','word'],
                                                                    ascending=[False,True])
    return word_counts.query("count> 1").reset_index(drop=True)

post_sars_word_count = get_word_count(since_sars2).rename(columns={'count': 'after'})

In [None]:
post_sars_word_count

In [None]:
before_sars = research_papers.query("published < '2019-11-30' & published > '2018-11-30'")
pre_sars_word_count = get_word_count(before_sars).rename(columns={'count': 'before'})

In [None]:
since_sars2.covid_related()

In [None]:
word_counts = post_sars_word_count.merge(pre_sars_word_count, on=['word'], how='left').fillna(0)
word_counts.before = word_counts.before.astype(int)
word_counts['before_pct'] = (word_counts.before / word_counts.before.sum()) * 100
word_counts['after_pct'] = (word_counts.after / word_counts.after.sum()) * 100
word_counts['pct_diff'] = word_counts.after_pct - word_counts.before_pct
word_counts = word_counts[word_counts.pct_diff > 0]
word_counts.sort_values(['pct_diff'], ascending=False).head(100)

## Which words are common on SARS2 Research Papers

In [None]:
word_counts.sort_values(['pct_diff'], ascending=False).head(100)