In [None]:
# %run db_connect.ipynb
# %run misc_functions.ipynb
# api_creds = graphika_api_login()
# username = api_creds["username"]
# pswd = api_creds["password"]
# %run connect_to_api.ipynb

In [None]:
map_tweets = pd.DataFrame()
map_hashtags = pd.DataFrame()
map_urls = pd.DataFrame()
map_retweets = pd.DataFrame()
map_feature_data = pd.DataFrame()
map_id = None
map_features = {'hashtags':pd.DataFrame(),'urls':pd.DataFrame(),'retweets':pd.DataFrame()}
debug_opt = True

In [None]:
def get_node_data(map_id, grouped = True):
    
    
    #This data should also be saved
    url = "https://api.graphika.com/clustermaps/{}/nodes".format(map_id)
    r = requests.get(url,auth=(username, pswd))
    node_data = r.json()

    url = "https://api.graphika.com/clustermaps/{}/clusters".format(map_id)
    r = requests.get(url,auth=(username, pswd))
    cluster_data = r.json()


    url = "https://api.graphika.com/clustermaps/{}/groups".format(map_id)
    r = requests.get(url,auth=(username, pswd))
    group_data = r.json()


    df_nodes = pd.DataFrame({"screen_name":[n["screenname"] for n in node_data],\
                            "node_id":[n["service_user_id"] for n in node_data],\
                             "cluster_id":[n["attentive_cluster_id"] for n in node_data]})
    df_clusters = pd.DataFrame({"cluster_id":[n["id"] for n in cluster_data["clusters"]], \
                                "cluster_name": [n["name"] for n in cluster_data["clusters"]],\
                                "group_id": [n["group"] for n in cluster_data["clusters"]]})
    df_clusters["cluster_id"] = df_clusters["cluster_id"].astype("int")
    df_group = pd.DataFrame({"group_id":[n for n in group_data], \
                            "group_name":[v["name"] for v in group_data.values()]})

    df0 = pd.merge(df_nodes, df_clusters,on = "cluster_id")
    
    if grouped:
    
        df0 = pd.merge(df0,df_group, on = "group_id")
    
        df0 = df0[["screen_name","node_id","cluster_name","group_name"]]
        
        
    df0["node_id"] = df0["node_id"].astype("str")

    return df0


def get_screen_names(map_id):
    url = "https://api.graphika.com/clustermaps/%s/nodes" %map_id
    r = requests.get(url,auth=(username, pswd))
    node_hash = r.json()
    screen_names = [x['screenname'] for x in node_hash]
    service_ids = [x["service_user_id"] for x in node_hash]
    d = {'screen_name':screen_names,'id':service_ids}
    df = pd.DataFrame(d)
    return df

In [None]:
def get_hits_data(feature, map_id, use_map_dates = True, date_from = False, date_to = False, case = "default"):
    '''This searches within a single map with the option of using the map's creation dates as a param'''
    global debug_opt
    if use_map_dates:
        date_from, date_to = get_map_dates (map_id)
    limit = ''
    if debug_opt:
        limit = ' LIMIT 1000'
    
    if case == "default":
        query = "SELECT * FROM "\
        "(SELECT message_id, hits_twitter_{}.node_id, hit_time, hit_value as hit, map_nodes.map_id FROM hits_twitter_{} \
        join map_nodes on map_nodes.node_id = hits_twitter_{}.node_id) s \
        where s.map_id = {} \
        and s.hit_time BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP{};;".format(feature, feature, feature, map_id, date_from, date_to,limit)
    
    if case == "standardize":
        query = "SELECT * FROM "\
        "(SELECT message_id, hits_twitter_{}.node_id, hit_time, lower(hit_value) as hit, map_nodes.map_id FROM hits_twitter_{} \
        join map_nodes on map_nodes.node_id = hits_twitter_{}.node_id) s \
        where s.map_id = {} \
        and s.hit_time BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP{};;".format(feature, feature, feature, map_id, date_from, date_to,limit)

    r = cur.execute(query)
    print('...Querying database')
    
    hits = cur.fetchall()
    print('...Morphing dataframe')
    hits_df = pd.DataFrame(hits, columns=["message_id","node_id", "time", "hit_value", "map_id"])
    hits_df["node_id"] = hits_df["node_id"].astype("str")
    hits_df["message_id"] = hits_df["message_id"].astype("str")
    hits_df["hit_type"] = feature

    return hits_df


def get_map_dates(map_id):
    url = "https://api.graphika.com/clustermaps/{}".format(map_id)
    r = requests.get(url,auth=(username, pswd))
    map_data = r.json()
    map_dates = map_data["date_range"]
    later_date = datetime.utcfromtimestamp(map_dates[1]).strftime('%Y-%m-%d %H:%M:%S')
    earlier_date = datetime.utcfromtimestamp(map_dates[0]).strftime('%Y-%m-%d %H:%M:%S')
    return earlier_date,later_date

In [None]:
def get_map_hits(feature,map_id):
    print('...Getting map nodes')
    map_nodes = get_node_data(map_id)
    print('...Getting hits')
    hits = get_hits_data(feature,map_id)
    print('...Merging nodes with hits')
    map_hits = pd.merge(map_nodes,hits)
    print('...Done!')
    return map_hits

In [None]:
def run_map_activity_report(debug = False):
    global debug_opt
    debug_opt = debug
    global map_id
    global map_tweets
    
    input_map_id = input(">> Enter map id: ")
    if input_map_id != map_id:
        print('...Fetching map data')
        map_id = input_map_id
        map_tweets = get_map_hits("tweets",map_id)
    else:
        print('...Map data found!')
    result = sort_by_count(map_tweets)
    display(result)
    if input('>> Do you want to save this result to a CSV? (y/n) \n') == 'y':
        print_csv(result)

In [None]:
def sort_by_count(df):
    count_by = input("count by group, cluster, or account: ")
    countby_choice = {'group':'group_name','cluster':'cluster_name','account':'screen_name'}
    try:
        activity_counts = map_tweets.groupby(countby_choice[count_by])["message_id"].count()
        activity_counts.name = count_by + "_tweet_count"
    except:
        print ("**Not a valid level to count tweets by**")
        return None

    activity_counts.sort_values(ascending = False, inplace = True)
    return pd.DataFrame(activity_counts)

In [None]:
def run_feature_activity_report(debug = False):
    
    global debug_opt
    debug_opt = debug
    global map_id
    global map_features
    global map_feature_data
        
        
    input_map_id = input(">> Enter map id: ")
    feature_type = input(">> Search for hashtags, urls, or retweets: ")
    case_sensitive = input(">> Is this search case sensitive? (y/n) \n")
    feature_value = (input (">> Comma separate search parameters, or hit enter for all: "))
    
    if case_sensitive == 'n':
        case_sensitive = False
    else:
        case_sensitive = True
    feature_value = feature_value.split(",")

    #Case densitivity option should be available
        
    if input_map_id != map_id:
        print('...Fetching map data')
        map_id = input_map_id
        map_features = {'hashtags':pd.DataFrame(),'urls':pd.DataFrame(),'retweets':pd.DataFrame()}
    if map_features[feature_type].empty:
        print('...Fetching {} data'.format(feature_type))
        map_features[feature_type] = get_map_hits(feature_type,map_id)
    
    map_feature_data = map_features[feature_type]
        
    if feature_value:
        feature_results = pd.DataFrame()
        for searchterm in feature_value:
            searchterm = searchterm.strip()
            print('...Searching {} for <{}>'.format(feature_type,searchterm))
            feature_results = feature_results.append(map_feature_data[map_feature_data.hit_value.str.contains(searchterm,case=case_sensitive)])

        if not feature_results.empty:
        
            user_counts = pd.DataFrame(feature_results.groupby("screen_name").count()["hit_value"])
            user_counts.columns = ["number_of_tweets"]

            user_counts.sort_values(by = "number_of_tweets", inplace= True, ascending = False)
            
            if input('>> Do you want to save the search results to a CSV? (y/n) \n') == 'y':
                print_csv(feature_results)
            display(user_counts)
            if input('>> Do you want to save the above table to a CSV? (y/n) \n') == 'y':
                print_csv(user_counts)
            
        else:
            print ("**No results for search term entered**")
    
    else:
        if input('>> Do you want to save the search results to a CSV? (y/n) \n') == 'y':
            print_csv(feature_results)
        return feature_results
        