# Query the mentions table

In [37]:
from datetime import date, timedelta
from gdelt import gdelt as gdelt_client
import pandas as pd

In [228]:
def document_has_theme(document, themes=[]):
    document_themes = str(document["Themes"]).split(";")
    for theme in themes:
        if theme in document_themes:
            return True
        
    return False

def query_documents(graphs, mentions, themes=[]):
    mentioned_documents = graphs.loc[graphs["DocumentIdentifier"].isin(mentions["MentionIdentifier"])]
    if 0 < len(themes):
        return mentioned_documents[mentioned_documents.apply(document_has_theme, args=[themes], axis=1)]
    else:
        return mentioned_documents
    
def query_mentions(mentions, events):
    return mentions.loc[mentions["GLOBALEVENTID"].isin(events["GLOBALEVENTID"])]

def query_top_most_events(mentions, events):
    top_most = mentions["GLOBALEVENTID"].value_counts(sort=True)[:1]
    top_most_event_ids = top_most.index.tolist()
    #print(top_most_event_ids)
    return events.loc[events["GLOBALEVENTID"].isin(top_most_event_ids)]

def query_complex(date_of_interest=date.today(), full_day=True):
    client = gdelt_client(version=2)
    events = client.Search(date_of_interest.strftime("%Y %m %d"), table="events", coverage=full_day)
    mentions = client.Search(date_of_interest.strftime("%Y %m %d"), table="mentions", coverage=full_day)
    graphs = client.Search(date_of_interest.strftime("%Y %m %d"), table="gkg", coverage=full_day) 
    del client
    #return mentions
    #return query_documents(graphs, mentions, ["MEDICAL"]) #pd.merge(events, mentions, on="GLOBALEVENTID")
    top_most_events = query_top_most_events(mentions, events)
    return (top_most_events,  query_mentions(mentions, top_most_events))

In [229]:
yesterday = date.today()-timedelta(days=1)
query_complex(yesterday)

(       GLOBALEVENTID   SQLDATE  MonthYear  Year  FractionDate Actor1Code  \
 18196      941769106  20200820     202008  2020     2020.6301        USA   
 
       Actor1Name Actor1CountryCode Actor1KnownGroupCode Actor1EthnicCode  ...  \
 18196   AMERICAN               USA                  NaN              NaN  ...   
 
       ActionGeo_Type                                ActionGeo_FullName  \
 18196              3  White House, District of Columbia, United States   
 
       ActionGeo_CountryCode ActionGeo_ADM1Code ActionGeo_ADM2Code  \
 18196                    US               USDC                NaN   
 
       ActionGeo_Lat ActionGeo_Long ActionGeo_FeatureID       DATEADDED  \
 18196       38.8951       -77.0364              531871  20200820000000   
 
                                                SOURCEURL  
 18196  https://www.mycentraloregon.com/2020/08/19/tru...  
 
 [1 rows x 62 columns],
         GLOBALEVENTID   EventTimeDate  MentionTimeDate  MentionType  \
 4531        9