# Requirements

In [1]:
!pip install --user gdelt

Collecting gdelt
  Downloading https://files.pythonhosted.org/packages/65/f9/a3d5111c8f17334b1752c32aedaab0d01ab4324bf26417bd41890d5b25d0/gdelt-0.1.10.6.1-py2.py3-none-any.whl (773kB)
Installing collected packages: gdelt
Successfully installed gdelt-0.1.10.6


# Import modules

In [13]:
from datetime import date, timedelta
from gdelt import gdelt as gdelt_client
import matplotlib.pyplot as plot
import pandas
import seaborn
import tempfile

# Query the knowledge graph
Use coverage option for querying all daily records. Otherwise records collected from the last 15 minutes are returned.
Use the date option to filter by date.

In [14]:
def get_graph(date, coverage=False):
    client = gdelt_client(version=2)
    events = client.Search(date.strftime("%Y %m %d"), table="gkg", coverage=coverage)
    del client
    return events

def get_today_graph(coverage=False):
    return get_graph(date.today(), coverage)

def get_yesterday_graph(coverage=False):
    return get_graph(date.today()-timedelta(days=1), coverage)

In [21]:
report_date = date.today()-timedelta(days=30)
graph = get_graph(report_date, coverage=True)
corona_records = graph.loc[graph["V2Themes"].str.contains("TAX_DISEASE_CORONAVIRUS", na=False)]

# Save records to temp

In [16]:
#corona_records.to_csv("{}/corona_{}.csv".format(tempfile.gettempdir(), date.strftime("%Y%m%d")), index=False)

# Location type exploding and filtering

In [17]:
from enum import Enum

class location_type(Enum):
    """Location type
        Defines the different location types.
    """
    UNKNOWN = 0
    COUNTRY = 1
    USSTATE = 2
    USCITY = 3
    WORLDCITY = 4
    WORLDSTATE = 5

class gdelt_location:
    """GDELT location
        Defines a GDELT location.
    """
    def __init__(self, location_typeid=0, name=None, country_code=None, admin1_code=None, lat=None, lon=None, feature_id=None):        
        self.location_type = location_type(int(location_typeid))
        self.location_name = name
        self.country_code = country_code
        self.admin1_code = admin1_code
        self.location_lat = lat
        self.location_lon = lon
        self.feature_id = feature_id
        
    def has_location_type(self, location_type):
        return location_type == self.location_type
    
    def location_type_matches(self, location_types):
        return self.location_type in location_types
    
    def __str__(self):
        return self.location_name
    
class location_filter():
    """Location Filter
        Defines different filters which can be applied on the dataframes.
    """
    def filter_by_type(self, gkg_dataframe, location_type):
        return gkg_dataframe.loc[gkg_dataframe.apply(lambda record: record["GDELT_Locations"].has_location_type(location_type), axis=1)]
    
    def filter_by_types(self, gkg_dataframe, location_types):
        return gkg_dataframe.loc[gkg_dataframe.apply(lambda record: record["GDELT_Locations"].location_type_matches(location_types), axis=1)]

def split_location_entries(locations):
    return [gdelt_location(*location) if 7 == len(location) else gdelt_location() for location in locations]

def split_locations(record):
    return split_location_entries([location.split("#") for location in str(record["Locations"]).split(";")])


# Filter the locations by location type

In [22]:
corona_locations = corona_records.copy(deep=True)
corona_locations["GDELT_Locations"] = corona_records.apply(lambda record: split_locations(record), axis=1)
corona_locations_exploded = corona_locations.explode("GDELT_Locations")

filter = location_filter()
corona_filtered_locations = filter.filter_by_types(corona_locations_exploded, [location_type.WORLDCITY, location_type.USCITY])
del corona_records
del graph

# Extract the coordinates and the name from the GDELT location

In [23]:
def to_point_locations(gkg_dataframe):
    if gkg_dataframe.empty:
        return pandas.DataFrame(columns=["GKGRECORDID", 
                                         "DATE", 
                                         "SourceCollectionIdentifier",
                                         "SourceCommonName",
                                         "DocumentIdentifier",
                                         "Location_Name",
                                         "Location_Lat",
                                         "Location_Lon"])
        
    point_locations = corona_filtered_locations[["GKGRECORDID", 
                                                 "DATE", 
                                                 "SourceCollectionIdentifier",
                                                 "SourceCommonName",
                                                 "DocumentIdentifier",
                                                 "GDELT_Locations"]].copy(deep=True)
    point_locations["Location_Name"] = gkg_dataframe.apply(lambda record: record["GDELT_Locations"].location_name, axis=1)
    point_locations["Location_Lat"] = gkg_dataframe.apply(lambda record: record["GDELT_Locations"].location_lat, axis=1)
    point_locations["Location_Lon"] = gkg_dataframe.apply(lambda record: record["GDELT_Locations"].location_lon, axis=1)
    return point_locations.drop("GDELT_Locations", axis=1)

corona_point_locations = to_point_locations(corona_filtered_locations)
del corona_filtered_locations

corona_point_locations

Unnamed: 0,GKGRECORDID,DATE,SourceCollectionIdentifier,SourceCommonName,DocumentIdentifier,Location_Name,Location_Lat,Location_Lon
23,20200213000000-23,20200213000000,1,myanmarnews.net,https://www.myanmarnews.net/news/264003252/cor...,"Tokyo, Tokyo, Japan",35.685,139.751
23,20200213000000-23,20200213000000,1,myanmarnews.net,https://www.myanmarnews.net/news/264003252/cor...,"Wuhan, Hubei, China",30.5833,114.267
46,20200213000000-46,20200213000000,1,northkoreatimes.com,https://www.northkoreatimes.com/news/264003252...,"Tokyo, Tokyo, Japan",35.685,139.751
46,20200213000000-46,20200213000000,1,northkoreatimes.com,https://www.northkoreatimes.com/news/264003252...,"Wuhan, Hubei, China",30.5833,114.267
51,20200213000000-51,20200213000000,1,pittsburghstar.com,https://www.pittsburghstar.com/news/264006117/...,"Barcelona, Comunidad Autonoma De Cataluna, Spain",41.3833,2.18333
...,...,...,...,...,...,...,...,...
185591,20200213224500-2670,20200213224500,1,religionnews.com,https://religionnews.com/2020/02/13/a-daughter...,"Xinjiang, Jiangxi, China",27.7481,116.274
185591,20200213224500-2670,20200213224500,1,religionnews.com,https://religionnews.com/2020/02/13/a-daughter...,"University Of Massachusetts, Massachusetts, Un...",42.389,-72.5287
185595,20200213224500-2674,20200213224500,1,starherald.com,https://www.starherald.com/news/trending/fear-...,"Syracuse, New York, United States",43.0481,-76.1474
185595,20200213224500-2674,20200213224500,1,starherald.com,https://www.starherald.com/news/trending/fear-...,"Yokohama, Kanagawa, Japan",35.45,139.65


# Save point locations to temp

In [24]:
corona_point_locations.to_csv("{}/corona_locations_{}.gkg.csv".format(tempfile.gettempdir(), report_date.strftime("%Y%m%d")), index=False)