In [None]:
import requests as re
import pandas as pd
import numpy as np
from datetime import datetime, timedelta,date

In [6]:
def fill_nodes_data(hits_df):
    groupname_values = hits_df.apply(groupname_lookup,axis = 1,result_type = 'reduce')
    hits_df.insert(loc = hits_df.shape[1]-1,column = 'group_name', value = groupname_values)

    clustername_values = hits_df.apply(clustername_lookup,axis = 1,result_type = 'reduce')
    hits_df.insert(loc = hits_df.shape[1]-1,column = 'cluster_name', value = clustername_values)

    nodename_values = hits_df.apply(nodename_lookup,axis = 1,result_type = 'reduce')
    hits_df.insert(loc = hits_df.shape[1]-1,column = 'node_name', value = nodename_values)
    
    return hits_df

In [None]:
def query_writer(table,map_id,limit = 10,dayrange = 30,platform = 'twitter'):
    if limit == False:
        limit = ''
    else:
        limit = 'LIMIT ' + str(limit)
    nowtime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') #np.datetime64('now')
    pasttime = (datetime.now() - timedelta(days = dayrange)).strftime('%Y-%m-%d %H:%M:%S') #nowtime - np.timedelta64(dayrange,'D')

    q = "SELECT * FROM \
    \
    ( \
    SELECT \
    * \
    FROM \
    hits_{5}_{0} \
    JOIN \
    map_nodes on map_nodes.node_id = hits_{5}_{0}.node_id \
    ) s \
    WHERE \
    s.map_id = '{1}' \
    AND \
    s.hit_time BETWEEN '{3}'::TIMESTAMP AND '{4}'::TIMESTAMP \
    {2};;".format(table,map_id,limit,pasttime,nowtime,platform)
    return q



In [None]:
def get_hits(hit_type, hit_value, how_many = 10,days = 30,platform = 'twitter'):
    query = query_writer(hit_type,hit_value,how_many,days,platform)
    
    r = cur.execute(query)
    hits = cur.fetchall()
    hits_df = pd.DataFrame(hits, columns=["id","hit_value", "node_id", "message_id","hit_time","params","id_2","node_id2","map_id","cluster_id"])
    
    hits_df["node_id"] = hits_df["node_id"].astype("str")
    hits_df["message_id"] = hits_df["message_id"].astype("str")
    hits_df["hit_type"] = hit_type
    hits_df.drop_duplicates('id',inplace = True)
    hits_df.drop(columns = ['node_id2','id_2','id','params'],inplace = True)

    return hits_df.set_index(['hit_value'])

In [None]:
def get_top_trends(map_id,hit_type = 'hashtags',pltfrm = 'twitter',limit = 1000, days = 100):
    return get_hits(hit_type,map_id,limit,days,pltfrm)

In [None]:
def get_map_group_count(map_id):
    url = 'https://api.graphika.com/maps/{}/groups'.format(map_id)
    r = re.get(url,auth = (username,pswd))
    result = r.json()
    return result[-1]['group_no'] + 1

In [None]:
def get_live_map_ids():
    url = 'https://api.graphika.com/maps'
    
    r = re.get(url,auth = (username,pswd))
    result = r.json()
    live_map_ids = []
    live_map_names = []
    for map_dict in result:
        if map_dict['is_live'] == True:
            live_map_ids.append(map_dict['id'])
            live_map_names.append(map_dict['name'])
    return live_map_ids,live_map_names

In [1]:
def graphika_trending(debug = False,days = 7):
    days = int(input('>> Enter how many days of trending hits you would like to pull: '))
#     platform = input('>> Enter the platform you would like to explore (twitter,youtube): ')
    platform = 'twitter'
    hit_type = input('>> Enter the type of hit you would like to explore (hashtags,urls,media): ')
    live_hashtags = []
    top_hits = pd.DataFrame()
    id_list,name_list = get_live_map_ids()
    if debug:
        id_list = id_list[:3]
        name_list = name_list[:3]
    print('...Looking at live maps:')
    print('\n'.join(name_list))
    for map_id in id_list:
        top_hits = top_hits.append(get_top_trends(map_id,hit_type,platform,10000,days))

    if not debug:
        print('...Computing map counts')
        top_hits = top_hits.merge(get_node_map_counts(top_hits),how = 'left',left_on = 'node_id',right_index = True)
    print('...Done!')
    if input('>> Do you want to save these results to a CSV? (y/n) \n') == 'y':
            print_csv(top_hits)
    return top_hits

In [None]:
def get_node_map_counts(df):
    temp_pivot = pd.pivot_table(df, values='map_id', index='node_id', aggfunc=pd.Series.nunique)
    return temp_pivot.sort_values('map_id',ascending = False).rename(columns = {'map_id':'map_id_count'})

In [None]:
def get_top_x_trends(top_tags,how_many = 30):
    print('...Getting a summary of top {} hits'.format(how_many))
#     how_many = input('>> Enter how many of the top hashtags you would like to view: ')
    top_x = []
    for tag in pd.DataFrame(top_tags.index.value_counts().head(how_many)).index.tolist():
        top_x.append({'hit_value':tag,\
                      'hit_count': top_tags[top_tags.index == tag].shape[0], \
                      'hit_share': round(100*top_tags[top_tags.index == tag].shape[0]/top_tags.shape[0],2), \
                      'tweet_count':top_tags.groupby('hit_value').get_group(tag).message_id.nunique(), \
                      'tweet_share':round(100*top_tags.groupby('hit_value').get_group(tag).message_id.nunique()/top_tags.message_id.nunique(),2), \
                      'map_count':top_tags.groupby('hit_value').get_group(tag).map_id.nunique(), \
                      'node_count':top_tags.groupby('hit_value').get_group(tag).node_id.nunique(),
                      'node_share':round(100*top_tags.groupby('hit_value').get_group(tag).node_id.nunique()/top_tags.node_id.nunique(),2) \
                     })
    
    print('...Done!')
    return pd.DataFrame(top_x).set_index('hit_value')
    # top25 = pd.DataFrame(top_tags.index.value_counts().head(50)).index


In [16]:
def summarize_hit(sum_df,res_df):
    hit = input('>> Enter a hit to get its summary: ')
    print('--------------------')
    print('{} SUMMARY'.format(hit))
    search = sum_df[sum_df.index == hit]
    if search.empty:
        print('**This is not a top hit**')
    else:
        print('{} appeared across {} live maps, and accounted for:\n-->{}% of all hits\n-->{}% of all posts\n-->and was shared by {}% of all accounts\n'.format( \
            hit,search.map_count.values[0],search.hit_share.values[0],search.tweet_share.values[0],search.node_share.values[0]))
        print('The maps {} appears in are:'.format(hit))
        for x in res_df[res_df.index == hit].map_id.value_counts().index.tolist():
            print('-->{} with \n---->{} hit(s)'.format(get_map_name(x),res_df[res_df.index == hit].map_id.value_counts()[x]))
        print()

    search = res_df[res_df.index == hit]
    
    if search.empty:
        print('**Hit not found**')
        return
    else:
        id_list = search[search.map_id_count > 0][['node_id','map_id']].values.tolist()
        id_set = set(map(tuple,id_list))
        print('...Getting the metadata of accounts that appeared across multiple maps for this hit:')
        influencer_df = pd.DataFrame()
        influencer_col = []
        maps = []
        for ids in id_set:
            maps.append(ids[1])
#             print('MAP: {}'.format(get_map_name(ids[1])))
            url = 'https://api.graphika.com/nodes/{}'.format(ids[0])
#             print(url)
            r = re.get(url,auth = (username,pswd))
            inf = r.json()['influencers']
            if len(inf) > 0:
                influencer_col.append(True)
#                 print('This account is an influencer in the following maps: {}'.format(inf))
            else:
                influencer_col.append(False)
#                 print('This account is not an influencer')
            influencer_df = influencer_df.append(get_node_metadata(ids[0],ids[1]).set_index('name'))
        influencer_df['influencer'] = influencer_col
        influencer_df['map_id'] = maps
        influencer_df = influencer_df.rename(columns = {'node_source_id':'node_id'})
        influencer_df = fill_nodes_data(influencer_df)
        return influencer_df