### Instagram API

In [None]:
client_id = ''
client_secret = ''

from instagram.client import InstagramAPI

# documentation here - https://github.com/Instagram/python-instagram
api = InstagramAPI(client_id=client_id, client_secret=client_secret)

In [None]:
# get ig tag info
used_tag = 'sanbernardino'
ans = api.tag(used_tag).media_count
print ans

In [None]:
import pickle
path = '/class/itpmssd/datasets/'

all_media = pickle.load(open(path+'%s_ig.p' % used_tag,'rb'))

### Building an Instagram co-tag graph

In [None]:
# pip install networkx

import networkx as nx

# functions that help us construct the graph
def graph_add_node(n, g):
    try:
        if g.has_node(n):
            g.node[n]['weight']+=1
        else:
            g.add_node(n)
            g.node[n]['label'] = n
            g.node[n]['weight'] = 1
    except:
        return
            
def graph_add_edge(n1, n2, g):
    if g.has_edge(n1, n2):
        g[n1][n2]['weight']+=1
    else:
        g.add_edge(n1,n2)
        g[n1][n2]['weight']=1

In [None]:
from itertools import combinations

g = nx.Graph()

for m in all_media:
    if not hasattr(m, 'tags'):
        continue
        
    cur_tags = [t.name for t in m.tags]
    for t in m.tags:
        graph_add_node(t.name, g)
        
    for t1, t2 in combinations(cur_tags, 2):
        graph_add_edge(t1, t2, g)

In [None]:
print 'num nodes:', nx.number_of_nodes(g)
print 'num edges:', nx.number_of_edges(g)

In [None]:
# pip install community
# pip install python-louvain

from community import *

In [None]:
# partition the graph using Louvain Modularity

part = best_partition(g)

In [None]:
part

In [None]:
part_vals = sorted(set(part.values()))
part_hist = [part.values().count(x) for x in part_vals]

In [None]:
%pylab inline

plot(part_vals, part_hist, 'ro-')
title('membership to cluster group')
xlabel('cluster group #')
ylabel('membership size')
rcParams['figure.figsize'] = 16,8

top_clusters = []
limit = 10
print 'top %s clusters:' % limit
for i in sorted(part_hist, reverse=True)[:limit]:
    print 'cluster %s has %s members' % (part_hist.index(i), i)
    top_clusters.append(part_hist.index(i))

In [None]:
cl_cen = {}

for c in top_clusters:
    g_sub=g.subgraph(g.nbunch_iter([x for x,y in part.items() if y==c]))
    avg_deg = round(mean(nx.degree(g_sub).values()),5)
    
    # calculate degree centrality per node
    cl_cen[c] = nx.degree_centrality(g_sub)

    print '%s) avg_deg:%s [%s,%s]' % (c, avg_deg, g_sub.order() ,g_sub.size())

In [None]:
# show top words from within clusters, sorted by node centrality
for c in top_clusters:

    members = {}  
    
    for m_id in [x for x,y in part.items() if y==c]:
        cur_sn = g.node[m_id]['label']
        cur_deg = round(cl_cen[c][cur_sn],4)
        
        members[cur_sn]=cur_deg
    
    print 'cluster %s' % c
    print sorted(members.items(), key=lambda x: -x[1])[:30]
    print ''


In [None]:
path = '/class/itpmssd/datasets/'
nx.write_gexf(g, path+'%s_insta_tags.gexf' % used_tag)
print path+'%s_insta_tags.gexf' % used_tag

### Now let's add location

In [None]:
import networkx as nx

def graph_add_node(n, g, params):
    
    try:
        if g.has_node(n):
            g.node[n]['weight']+=1
        else:
            g.add_node(n)
            g.node[n]['weight'] = 1
            for k,v in params.items():
                g.node[n][k]=v
    except:
        return
            
def graph_add_edge(n1, n2, weight, g):
    g.add_edge(n1,n2)
    g[n1][n2]['weight']=weight

In [None]:
# what should our edges be?
# Let's try to show users with overlapping tags

from collections import defaultdict

tag_info = defaultdict(set)
users_to_tags = defaultdict(set)

for m in all_media:
    for tag in [t.name for t in m.tags]:
        tag_info[tag].add(m.user.id)
        users_to_tags[m.user.id].add(tag)

In [None]:
g = nx.DiGraph()

# add nodes only to those users with geo location
for m in all_media:
    
    if not hasattr(m, 'tags'):
        continue
        
    if not hasattr(m, 'location'):
        continue
        
    try:
        params = {'label':m.user.username, 
                  'likes':m.like_count, 
                  'comments':m.comment_count,
                  'location_name':m.location.name, 
                  'lat':m.location.point.latitude, 
                  'lng':m.location.point.longitude
                  }
    
        graph_add_node(m.user.id, g, params)
        
    except:
        continue

In [None]:
# here we get all users who used a hashtag
tag_info['prayfor']

In [None]:
users_to_tags['2130803277']

In [None]:
# now lets generate a list of edges -> users who have at least 2 tags in common

edgelist = defaultdict(int)

for u1,u2 in combinations(users_to_tags.keys(), 2):
    t1 = users_to_tags[u1]
    t2 = users_to_tags[u2]
    
    # if the two users have more than one tag in common, add them to our edgelist
    if len(t1 & t2)>1:
        edgelist[(u1,u2)]=len(t1 & t2)
    

In [None]:
edgelist.keys()

In [None]:
for n1,n2 in combinations(g.nodes(), 2):
    ans = edgelist.get((n1,n2), None) or edgelist.get((n1,n2), None)
    if ans:
        graph_add_edge(n1,n2,ans,g)

In [None]:
print g.number_of_nodes()
print g.number_of_edges()

In [None]:
nx.write_gexf(g, path+'%s_insta_location.gexf' % used_tag)
print path+'%s_insta_location.gexf' % used_tag

<img src="http://i.imgur.com/06mvfnK.png"/>

<img src="http://i.imgur.com/lChkrbz.png"/>

<img src="http://i.imgur.com/tGpaULp.png"/>