In [82]:
import pandas as pd
from datetime import datetime
%run ./connect_to_api.ipynb

In [107]:
def get_hit(hit_type, hit_value, how_many = 10,sort_by = 'id',get_groups = True,get_clusters = True,get_nodenames = True):
    if how_many == 0:
        how_many = ''
    else:
        how_many = 'LIMIT ' + str(how_many)
    """ 
    Get all retweets of a certain tweet ID between given dates
  
    Parameters: 
    feature (str): What type of hit to look for (retweets)
    rt_id (str): Tweet ID
    date_from (datetime): Datetime of when the tweet was made
    date_to (datetime): Datetime of when to check for retweeters till (a week after the tweet was made)
    
    Returns: 
    hits_df (DataFrame): Table with all retweets for a given tweet ID
  
    """

    query = "SELECT * FROM \
    \
    ( \
    SELECT \
    * \
    FROM \
    hits_twitter_{} \
    JOIN \
    map_nodes on map_nodes.node_id = hits_twitter_{}.node_id \
    WHERE \
    hits_twitter_{}.hit_value = '{}' \
    ) s \
    \
    {};;".format(hit_type, hit_type, hit_type, hit_value, how_many)
    
    r = cur.execute(query)
    hits = cur.fetchall()
    hits_df = pd.DataFrame(hits, columns=["id","hit_value", "node_id", "message_id","hit_time","params","id_2","node_id2","map_id","cluster_id"])
    
    hits_df["node_id"] = hits_df["node_id"].astype("str")
    hits_df["message_id"] = hits_df["message_id"].astype("str")
    hits_df["hit_type"] = hit_type
    hits_df.drop_duplicates([sort_by],inplace = True)
    hits_df.drop(columns = ['node_id2','id_2','id','params'],inplace = True)
    
    date_values = hits_df.apply(date_extractor,axis = 1,result_type = 'reduce')
    hits_df.insert(loc = hits_df.shape[1]-1,column = 'date', value = date_values)
        
    if get_groups:
        groupname_values = hits_df.apply(groupname_lookup,axis = 1,result_type = 'reduce')
        hits_df.insert(loc = hits_df.shape[1]-1,column = 'group_name', value = groupname_values)
        
    if get_clusters:
        clustername_values = hits_df.apply(clustername_lookup,axis = 1,result_type = 'reduce')
        hits_df.insert(loc = hits_df.shape[1]-1,column = 'cluster_name', value = clustername_values)
        
    if get_nodenames:
        nodename_values = hits_df.apply(nodename_lookup,axis = 1,result_type = 'reduce')
        hits_df.insert(loc = hits_df.shape[1]-1,column = 'node_name', value = nodename_values)

    return hits_df.set_index(['hit_value'])

In [64]:
def clustername_lookup(rcvd_series):
    map_id = rcvd_series['map_id']
    cluster_id = rcvd_series['cluster_id']
    url = 'https://api.graphika.com/maps/{}/clusters/{}'.format(map_id,cluster_id)
    r = requests.get(url, auth = (username,pswd))
    try:
        return r.json()['name']
    except:
        return 'Map access needed'

In [65]:
def nodename_lookup(rcvd_series):
    map_id = rcvd_series['map_id']
    node_id = rcvd_series['node_id']
#     cluster_id = rcvd_series['cluster_id']
    url = 'https://api.graphika.com/maps/{}/nodes/{}'.format(map_id,node_id)
    r = requests.get(url, auth = (username,pswd))
    try:
        return r.json()['name']
    except:
        return 'Map access needed'

In [108]:
def groupname_lookup(rcvd_series):
    map_id = rcvd_series['map_id']
    cluster_id = rcvd_series['cluster_id']
    url = 'https://api.graphika.com/maps/{}/clusters/{}'.format(map_id,cluster_id)
    r = requests.get(url, auth = (username,pswd))
    try:
        group_id = r.json()['group_no']
        url = 'https://api.graphika.com/maps/{}/groups/{}'.format(map_id,group_id)
        r = requests.get(url, auth = (username,pswd))
        return r.json()['name']
    except:
        return 'Map access needed'

In [66]:
def date_extractor(rcvd_series):
    return datetime.strftime(rcvd_series.hit_time,'%d-%b-%Y %H00')

In [6]:
# hits_to_check_dict = {'urls':['theringer.com'],'retweets':['1227561237782855680'],'hashtags':['MAGA'],'mentions':['1343351467']}

# results_dict = {'urls':{},'retweets':{},'hashtags':{},'mentions':{}}


In [7]:
# for hit_type in hits_to_check_dict.keys():
#     for hit_value in hits_to_check_dict[hit_type]:
# #         print('{}: {}'.format(hit_type,hit_value))
#         results_dict[hit_type][hit_value] = (get_hit(hit_type,hit_value,50,'map_id'))
    

In [67]:

def get_summary(result):
    hitcounts_dict = {'node_id':[],'message_id':[],'map_id':[],'hit_time':[],'date':[]}
    for k in hitcounts_dict:
        hitcounts_dict[k].append(result[k].nunique())
        hitcounts_dict[k].append(result[k].value_counts())
    print('For the given hit value, there are:')
    for k in hitcounts_dict:
        print('---> {} unique {}s'.format(hitcounts_dict[k][0],k))
    return hitcounts_dict


In [109]:
temp = get_hit('hashtags','boycotthallmark',1000,sort_by = 'map_id',get_clusters = False,get_nodenames = False)

In [110]:
temp

Unnamed: 0_level_0,node_id,message_id,hit_time,map_id,cluster_id,date,group_name,hit_type
hit_value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
boycotthallmark,3397576972,1210690490548596737,2019-12-27 22:34:41,2325,32,27-Dec-2019 2200,Map access needed,hashtags
boycotthallmark,3397576972,1210690490548596737,2019-12-27 22:34:41,2284,38,27-Dec-2019 2200,Global Health,hashtags
boycotthallmark,3397576972,1210690490548596737,2019-12-27 22:34:41,826,11,27-Dec-2019 2200,Map access needed,hashtags
boycotthallmark,3397576972,1210690490548596737,2019-12-27 22:34:41,1425,34,27-Dec-2019 2200,Map access needed,hashtags
boycotthallmark,3397576972,1210690490548596737,2019-12-27 22:34:41,1434,43,27-Dec-2019 2200,Map access needed,hashtags
...,...,...,...,...,...,...,...,...
boycotthallmark,30934646,1206038937946349568,2019-12-15 02:31:04,555,24,15-Dec-2019 0200,Map access needed,hashtags
boycotthallmark,192050063,1206025586138701824,2019-12-15 01:38:01,1866,7,15-Dec-2019 0100,Map access needed,hashtags
boycotthallmark,17684104,1206027023749206018,2019-12-15 01:43:44,1277,39,15-Dec-2019 0100,Map access needed,hashtags
boycotthallmark,17684104,1206027023749206018,2019-12-15 01:43:44,1561,24,15-Dec-2019 0100,Map access needed,hashtags


In [75]:
get_summary(temp)

For the given hit value, there are:
---> 127 unique node_ids
---> 152 unique message_ids
---> 26 unique map_ids
---> 152 unique hit_times
---> 37 unique dates


{'node_id': [127, 17684104              6
  614236957             3
  2814174467            3
  4086456137            3
  780604465137061888    2
                       ..
  11622712              1
  2258357868            1
  172141399             1
  78442404              1
  54222811              1
  Name: node_id, Length: 127, dtype: int64],
 'message_id': [152, 1206035773742559232    1
  1206007100708659201    1
  1206009944622993409    1
  1206027229211365377    1
  1206060329551065088    1
                        ..
  1206172537408745473    1
  1206012646513926145    1
  1206027201520517120    1
  1206136753217822721    1
  1206080206060052481    1
  Name: message_id, Length: 152, dtype: int64],
 'map_id': [26, 2243    53
  2247    19
  2325    17
  2277     9
  2285     7
  2311     5
  2320     5
  2212     4
  2313     4
  2274     4
  2305     3
  2249     3
  2094     2
  2331     2
  2270     2
  2315     2
  2279     2
  2214     1
  2213     1
  2300     1
  1422     1
  

## Add a group lookup function
## For a given segment (map/group/cluster): Density, number of nodes, influence, number of tweets

In [106]:
groupname_lookup()

'Global Health'