In [57]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [58]:
import numpy as np
import pandas as pd
import functools
from nltk import PorterStemmer

In [59]:
pd.options.mode.chained_assignment = None

df=pd.read_csv("/home/varun_nagrare/2020-08-03/metadata.csv", low_memory=False, usecols=['title','journal','abstract','authors','doi','publish_time'])
df.head()

Unnamed: 0,title,doi,abstract,publish_time,authors,journal
0,Clinical features of culture-proven Mycoplasma...,10.1186/1471-2334-1-6,OBJECTIVE: This retrospective chart review des...,2001-07-04,"Madani, Tariq A; Al-Ghamdi, Aisha A",BMC Infect Dis
1,Nitric oxide: a pro-inflammatory mediator in l...,10.1186/rr14,Inflammatory diseases of the respiratory tract...,2000-08-15,"Vliet, Albert van der; Eiserich, Jason P; Cros...",Respir Res
2,Surfactant protein-D and pulmonary host defense,10.1186/rr19,Surfactant protein-D (SP-D) participates in th...,2000-08-25,"Crouch, Erika C",Respir Res
3,Role of endothelin-1 in lung disease,10.1186/rr44,Endothelin-1 (ET-1) is a 21 amino acid peptide...,2001-02-22,"Fagan, Karen A; McMurtry, Ivan F; Rodman, David M",Respir Res
4,Gene expression in epithelial cells in respons...,10.1186/rr61,Respiratory syncytial virus (RSV) and pneumoni...,2001-05-11,"Domachowske, Joseph B; Bonville, Cynthia A; Ro...",Respir Res


In [60]:
# keep only documents with covid -cov-2 and cov2
def search_focus(df):
    dfa = df[df['abstract'].str.contains('covid')]
    dfb = df[df['abstract'].str.contains('-cov-2')]
    dfc = df[df['abstract'].str.contains('cov2')]
    dfd = df[df['abstract'].str.contains('ncov')]
    dfe = df[df['abstract'].str.contains('coronavirus')]
    dff = df[df['abstract'].str.contains('sars-cov-2')]
    
    frames=[dfa,dfb,dfc,dfd,dfe,dff]
    df = pd.concat(frames)
    df=df.drop_duplicates(subset='title', keep="first")
    return df

In [61]:
print (df.shape)

#drop duplicates
df=df.drop_duplicates()

#drop NANs 
df=df.fillna("no data ")

# drop duplicates keeping the first occurence
df=df.drop_duplicates(subset='title', keep="first")

# finding publish_time which contains year 2020
df=df[df['publish_time'].str.contains('2020')]

(207311, 6)


In [62]:
# convert abstracts to lowercase
df["abstract"] = df["abstract"].str.lower()+df["title"].str.lower()

#show 5 lines of the new dataframe
df=search_focus(df)

print(df.shape)

df.head()

(71778, 6)


Unnamed: 0,title,doi,abstract,publish_time,authors,journal
4662,Latest assessment on COVID-19 from the Europea...,10.2807/1560-7917.es.2020.25.8.2002271,no data latest assessment on covid-19 from the...,2020-02-27,no data,Euro Surveill
4698,Updated rapid risk assessment from ECDC on the...,10.2807/1560-7917.es.2020.25.9.2003051,no data updated rapid risk assessment from ecd...,2020-03-05,no data,Euro Surveill
4732,Updated rapid risk assessment from ECDC on the...,10.2807/1560-7917.es.2020.25.10.2003121,no data updated rapid risk assessment from ecd...,2020-03-12,no data,Euro Surveill
4800,Empfehlungen zur intensivmedizinischen Therapi...,10.1007/s00063-020-00674-3,no data empfehlungen zur intensivmedizinischen...,2020-03-12,"Kluge, Stefan; Janssens, Uwe; Welte, Tobias; W...",Med Klin Intensivmed Notfmed
5683,The impact of COVID-19 on the provision of don...,10.1038/s41409-020-0873-x,no data the impact of covid-19 on the provisio...,2020-03-23,"Szer, Jeff; Weisdorf, Daniel; Querol, Sergio; ...",Bone Marrow Transplant


In [63]:
# function to find the root word of search_words
def stem_words(search_words):
    stemmer = PorterStemmer()
    singles=[]
    for w in search_words:
        singles.append(stemmer.stem(w))
    return singles

In [64]:
# function to find exact words
def search_dataframe(df,search_words):
    search_words=stem_words(search_words)
    df1=df[functools.reduce(lambda a, b: a&b, (df['abstract'].str.contains(s) for s in search_words))]
    return df1

In [65]:
# function analyze search results for relevance with word count or abstract length
def search_relevance(rel_df,search_words):
    rel_df['score']=""
    search_words=stem_words(search_words)
    for index, row in rel_df.iterrows():
        abstract = row['abstract']
        result = abstract.split()
        len_abstract=len(result)
        score=0
        for word in search_words:
            score=score+result.count(word)
        final_score=(score/len_abstract)
        rel_score=score*final_score
        rel_df.loc[index, 'score'] = rel_score
    rel_df=rel_df.sort_values(by=['score'], ascending=False)
    return rel_df

In [82]:
# function to get best sentences from the search results
def get_sentences(df1,search_words):
    df_table = pd.DataFrame(columns = ["pub_date","authors","title","fragment","rel_score"])
    search_words=stem_words(search_words)
    for index, row in df1.iterrows():
        pub_sentence=''
        sentences_used=0
        #break apart the absracrt to sentence level
        sentences = row['abstract'].split('. ')
        #loop through the sentences of the abstract
        highligts=[]
        for sentence in sentences:
            # missing lets the system know if all the words are in the sentence
            missing=0
            #loop through the words of sentence
            for word in search_words:
                #if keyword missing change missing variable
                if word not in sentence:
                    missing=1
                #if '%' in sentence:
                    #missing=missing-1
            # after all sentences processed show the sentences not missing keywords
            if missing==0 and len(sentence)<1000 and sentence!='':
                sentence=sentence.capitalize()
                if sentence[len(sentence)-1]!='.':
                    sentence=sentence+'.'
                pub_sentence=pub_sentence+'<br><br>'+sentence
        if pub_sentence!='':
            sentence=pub_sentence
            sentences_used=sentences_used+1
            authors=row["authors"].split(" ")
            link=row['doi']
            title=row["title"]
            score=row["score"]
            linka='https://doi.org/'+link
            linkb=title
            sentence='<p fontsize=tiny" align="left">'+sentence+'</p>'
            final_link='<p align="left"><a href="{}">{}</a></p>'.format(linka,linkb)
            to_append = [row['publish_time'],authors[0]+' and others.',final_link,sentence,score]
            df_length = len(df_table)
            df_table.loc[df_length] = to_append
    return df_table

# What is known about transmission, incubation, and environmental stability?

In [83]:
# list of lists of search terms
questions=[
['Q: What is the range of incubation periods for the disease in humans?'],
['Q: How long are individuals contagious?'],
['Q: How long are individuals contagious, even after recovery.'],
['Q: Is population movement control effective in stopping transmission?'],
['Q: What is the role of environment in transmission?']
]

search_words=[
['incubation','period','range'],
['viral','shedding','duration'],
['asymptomatic','shedding'],
['restriction', 'movement'],
['transmission','routes']
]

q_count=0

for search_words in search_words:
    str1=''
    # make a string of the search words to print readable table
    str1=''.join(questions[q_count])
    
    #search the dataframe for all words
    df1=search_dataframe(df,search_words)

    # analyze search results for relevance 
    df1=search_relevance(df1,search_words)

    # get best sentences
    df_table=get_sentences(df1,search_words)
    
    # get the length of rows
    length=df_table.shape[0]
    
    #limit 3 results
    df_table=df_table.head()
    
    # dropping the rel_score
    df_table=df_table.drop(['rel_score'], axis=1)
    
    #convert df to html readable table
    df_table=HTML(df_table.to_html(escape=False,index=False))
    
    # display the question
    display(HTML('<br><h3>'+str1+'</h3>'))
    
    #display the summary
    if length<1:
        print ("No reliable answer found")
    else:
        display(df_table)
    q_count=q_count+1

pub_date,authors,title,fragment
2020-03-18,"Jiang, and others.",Is a 14-day quarantine period optimal for effectively controlling coronavirus disease 2019 (COVID-19)?,Results the full range of incubation periods of the covid-19 cases ranged from 0 to 33 days among 2015 cases.
2020-05-15,"Bui, and others.",Estimation of the incubation period of SARS-CoV-2 in Vietnam,"Average incubation periods estimated using different distribution model ranged from 6.0 days to 6.4 days with the weibull distribution demonstrated the best fit to the data. The estimated mean of incubation period using weibull distribution model was 6.4 days (95% credible interval (ci): 4.89 - 8.5), standard deviation (sd) was 3.05 (95%ci 3.05 - 5.30), median was 5.6, ranges from 1.35 to 13.04 days (2.5th to 97.5th percentiles)."
2020-03-08,"Xia, and others.",Transmission of corona virus disease 2019 during the incubation period may lead to a quarantine loophole,"Results: the estimated mean incubation period for covid-19 was 4.9 days (95% confidence interval [ci], 4.4 to 5.4) days, ranging from 0.8 to 11.1 days (2.5th to 97.5th percentile)."
2020,"Yang, and others.","Estimation of incubation period and serial interval of COVID-19: analysis of 178 cases and 131 transmission chains in Hubei province, China","Our estimated median incubation period of covid-19 is 5.4 days (bootstrapped 95% confidence interval (ci) 4.8-6.0), and the 2.5th and 97.5th percentiles are 1 and 15 days, respectively; while the estimated serial interval of covid-19 falls within the range of -4 to 13 days with 95% confidence and has a median of 4.6 days (95% ci 3.7-5.5)."
2020,"Yang, and others.",[The preliminary analysis on the characteristics of the cluster for the COVID-19],"We selected 325 cases to estimate the incubation period and its range was 1 to 20 days, median was 7 days, and mode was 4 days."


pub_date,authors,title,fragment
2020-05-23,"Weiss, and others.",Spatial and temporal dynamics of SARS-CoV-2 in COVID-19 patients: A systematic review,"In this study, we aimed to provide a coherent overview from published studies of the duration of viral detection and viral load in covid-19 patients, stratified by specimen type, clinical severity and age."
2020-07-21,"Dodds, and others.",Model-Informed Drug Repurposing: Viral Kinetic Modeling to Prioritize Rational Drug Combinations for COVID-19.,"The endpoints and metrics included viral load area under the curve (auc), duration of viral shedding, and epithelial cells infected. In addition, we observed that the time-window opportunity for a therapeutic intervention to effect duration of viral shedding exceeds the effect on sparing epithelial cells from infection or impact on viral load auc."
2020,"Dodds, and others.",Model-Informed Drug Repurposing: Viral Kinetic Modeling to Prioritize Rational Drug Combinations for COVID-19,"The endpoints and metrics included viral load area under the curve (auc), duration of viral shedding, and epithelial cells infected. In addition, we observed that the time-window opportunity for a therapeutic intervention to effect duration of viral shedding exceeds the effect on sparing epithelial cells from infection or impact on viral load auc."
2020-07-30,"Park, and others.",Determining the period of communicability of SARS-CoV-2: A rapid review of the literature,"Methods: studies reporting empirical data on the period of communicability of sars-cov-2 through investigations of duration of communicability based on in-person contact ('contact transmission'), isolation and culture of virus ('viral isolation'), and viral shedding by detection of nucleic acids by rt-pcr ('viral shedding') were identified through searches of peer-reviewed and pre-print health sciences literature databases (ovid medline, embase, google scholar, medrxiv and arxiv) and the grey literature."
2020-07-28,"Cevik, and others.","SARS-CoV-2 viral load dynamics, duration of viral shedding and infectiousness: a living systematic review and meta-analysis","Background viral load kinetics and the duration of viral shedding are important determinants for disease transmission. We aim i) to characterise viral load dynamics, duration of viral rna, and viable virus shedding of sars-cov-2 in various body fluids and ii) to compare sars-cov-2 viral dynamics with sars-cov-1 and mers-cov. Methods: medline, embase, europe pmc, preprint servers and grey literature were searched to retrieve all articles reporting viral dynamics and duration of sars-cov-2, sars-cov-1 and mers-cov shedding. Funding: no funding was received.sars-cov-2 viral load dynamics, duration of viral shedding and infectiousness: a living systematic review and meta-analysis."


pub_date,authors,title,fragment
2020-03-31,"Abduljalil, and others.","Epidemiology, genome, and clinical features of the pandemic SARS-CoV-2: a recent view",Asymptomatic carrier state is of paramount importance because of carriers' ability to spread the infection and to shed the virus into the air and surroundings.
2020,"Li, and others.",Viral shedding dynamics in asymptomatic and mildly symptomatic patients infected with SARS-CoV-2,"The median duration of viral shedding was 11.5, 28 and 31 days for pre-symptomatic, asymptomatic and mild symptomatic patients, separately. Conclusions: long-term viral shedding was presented in patients with mild symptoms and asymptomatic cases, and specific antibody production to sars-cov-2 may not guarantee viral clearance after discharge. These observations should be considered when making decisions regarding clinical and public health and strategies for prevention and control of sars-cov-2 infection.viral shedding dynamics in asymptomatic and mildly symptomatic patients infected with sars-cov-2."
2020,"Matava, and others.",Pediatric Airway Management in COVID-19 Patients: Consensus Guidelines From the Society for Pediatric Anesthesia's Pediatric Difficult Intubation Collaborative and the Canadian Pediatric Anesthesia Society,"Pedi-c identified overarching goals during care, including minimizing aerosolized respiratory secretions, minimizing the number of clinicians in contact with a patient, and recognizing that undiagnosed asymptomatic patients may shed the virus and infect health care workers."
2020-04-20,"Matava, and others.",Pediatric Airway Management in Coronavirus Disease 2019 Patients: Consensus Guidelines From the Society for Pediatric Anesthesia’s Pediatric Difficult Intubation Collaborative and the Canadian Pediatric Anesthesia Society,"Pedi-c identified overarching goals during care, including minimizing aerosolized respiratory secretions, minimizing the number of clinicians in contact with a patient, and recognizing that undiagnosed asymptomatic patients may shed the virus and infect health care workers."
2020-05-25,"Beggs, and others.",Is there an airborne component to the transmission of COVID-19? : a quantitative analysis study,"Conclusions given that live sars-cov-2 virions are known to be shed in high concentrations from the nasal cavity of both symptomatic and asymptomatic covid-19 patients, the results suggest that individuals who share enclosed spaces with an infector may be at risk of contracting covid-19 by the aerosol route, even when practicing social distancing.is there an airborne component to the transmission of covid-19? : a quantitative analysis study."


pub_date,authors,title,fragment
2020,"Guerrero, and others.",Canadian children's and youth's adherence to the 24-h movement guidelines during the COVID-19 pandemic: A decision tree analysis,"Results highlight the importance of targeting parents' perceived capability for the promotion of children's and youth's movement behaviors during challenging times of the covid-19 pandemic, paying particular attention to enhancing parental perceived capability to restrict screen time.canadian children's and youth's adherence to the 24-h movement guidelines during the covid-19 pandemic: a decision tree analysis."
2020,"Moore, and others.",Impact of the COVID-19 virus outbreak on movement and play behaviours of Canadian children and youth: a national survey,"Though the covid-19 virus outbreak has changed the daily lives of children and youth, it is unknown to what extent related restrictions may compromise the ability to play and meet movement behaviour recommendations. This secondary data analysis examined the immediate impacts of covid-19 restrictions on movement and play behaviours in children and youth. Results: only 4.8% (2.8% girls, 6.5% boys) of children and 0.6% (0.8% girls, 0.5% boys) of youth were meeting combined movement behaviour guidelines during covid-19 restrictions."
2020-06-13,"Tang, and others.",Movement control as an effective measure against Covid-19 spread in Malaysia: an overview,"Results: at the onset of the covid-19 outbreak, malaysia had initiated travel restrictions and quarantine; but with a persistent increase in new covid-19 cases, the movement control order was finally rolled out on 18 march 2020, requiring closure of all businesses except those providing essential services and items."
2020-06-12,"Bucsky, and others.",Modal share changes due to COVID-19: The case of Budapest,"One of the most important responses of countries worldwide to slow the spread of the pandemic has been to restrict the movement of people, which has had a considerable effect on transport systems."
2020-07-21,"Ong, and others.",Implications of the COVID-19 lockdown on dengue transmission and the occurrence of Aedes aegypti (Linnaeus) and Aedes albopictus (Skuse) in Malaysia,"The impact of movement restrictions (mrs) during the covid-19 lockdown on the existing endemic infectious disease dengue fever has generated considerable research interest. With a partial lockdown implemented by malaysia on 18 march, we postulate the movement restrictions (mrs) of people in large-scale would hamper the regular dengue transmission and aim to reveal the impact of mrs on both dengue incidences and aedes mosquitoes."


pub_date,authors,title,fragment
2020,"Rickman, and others.",Nosocomial transmission of COVID-19: a retrospective study of 66 hospital-acquired cases in a London teaching hospital,"In a major london teaching hospital, 66/435 (15%) of covid-19 inpatient cases between 2 march and 12 april 2020 were definitely or probably hospital-acquired, through varied transmission routes."
2020,"Manabe, and others.",Trends in clinical features of novel coronavirus disease (COVID-19): A systematic review and meta-analysis of studies published from December 2019 to February 2020,"During this observation period, as the infection continued to spread, the clinical conditions for majority of patients became less severe with the changes in the route of transmission.trends in clinical features of novel coronavirus disease (covid-19): a systematic review and meta-analysis of studies published from december 2019 to february 2020."
2020,"Herrera, and others.",Is the oral cavity relevant in SARS-CoV-2 pandemic?,"Similarly, a decrease in the oral viral load would diminish the amount of virus expelled and reduce the risk of transmission, since (i) during the first 10 days, the virus mainly accumulates at the nasal, oral, and pharyngeal area; (ii) the number of angiotensin-converting enzyme (ace2) receptor is greater in the salivary glands as compared with the lungs; and (iii) salivary droplets represent the most relevant transmission route."
2020,"Zhang, and others.","Biological, clinical and epidemiological features of COVID-19, SARS and MERS and AutoDock simulation of ACE2","In addition to the common route of transmission including airborne transmission, these three viruses have their own unique routes of transmission such as fecal-oral route of transmission covid-19. Their receptors and routes of transmission are not all the same, which makes them different in clinical features and treatments."
2020-05-15,"Garcia-Alamino, and others.","Aspectos Epidemiológicos, Clínica Y Mecanismos De Control De La Pandemia Por Sars-Cov-2: Situación En España","Sars-cov-2 has a high transmission rate, the route of transmission between humans is through the secretions of infected people, hands or contaminated objects."
