# Query the newsarticles tagged with CORONAVIRUS
- Query using a date range
- Count the news articles by using a time window of 15 minutes

# Requirements

In [1]:
!pip install --user gdelt



You are using pip version 18.1, however version 20.0.2 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


# Import modules

In [1]:
from datetime import date, timedelta
from gdelt import gdelt as gdelt_client
import os.path
import pandas
import tempfile

In [2]:
def get_graph(date, coverage=False):
    client = gdelt_client(version=2)
    graph = client.Search(date.strftime("%Y %m %d"), table="gkg", coverage=coverage)
    graph = graph.astype({"DATE": str})
    graph["DATE"] = graph["DATE"].apply(lambda dateStr: pandas.to_datetime(dateStr[:14], format="%Y%m%d%H%M%S"))
    del client
    return graph

def get_graph_range(from_date, to_date, coverage=False):
    date_range = to_date-from_date
    if date_range.days < 1:
        return
    
    client = gdelt_client(version=2)
    graph = None
    for day in range(0, date_range.days + 1):
        date = from_date + timedelta(days=day)
        graph_temp = client.Search(date.strftime("%Y %m %d"), table="gkg", coverage=coverage)
        graph_temp = graph_temp.astype({"DATE": str})
        graph_temp["DATE"] = graph_temp["DATE"].apply(lambda dateStr: pandas.to_datetime(dateStr[:14], format="%Y%m%d%H%M%S"))
        if graph is None:
            graph = graph_temp
        else:
            graph = pandas.concat([graph, graph_temp], axis=0)
    del client
    return graph

def count_by_theme(graph, theme):
    theme_graph = graph.loc[graph["V2Themes"].str.contains(theme, na=False)]
    theme_rank = theme_graph.groupby("DATE")
    return theme_rank.size()

def save_temporary_report(date, theme, name):
    graph = get_graph(date, coverage=True)
    graph_counts = count_by_theme(graph, theme)
    graph_counts.columns=["COUNT"]
    csv_file = "{}/{}_{}.report.csv".format(tempfile.gettempdir(), name, date.today().strftime("%Y%m%d"))
    if os.path.isfile(csv_file):
        header = False
    else:
        header = graph_counts.columns
    graph_counts.to_csv(csv_file, header=header, mode="a", index=True)
    del graph_counts
    del graph

# Query and count the newsarticles

In [3]:
end_date = date.today()-timedelta(days=7)
start_date = end_date-timedelta(days=90)
date_range = end_date-start_date
for day in range(0, date_range.days + 1):
    report_date = start_date + timedelta(days=day)
    save_temporary_report(report_date, "TAX_DISEASE_CORONAVIRUS", "corona")