### Instagram API

In [None]:
client_id = ''
client_secret = ''

from instagram.client import InstagramAPI

# documentation here - https://github.com/Instagram/python-instagram
api = InstagramAPI(client_id=client_id, client_secret=client_secret)

In [None]:
# get ig tag info
used_tag = 'PopeInNYC'
ans = api.tag(used_tag).media_count
print ans

In [None]:
import pickle
path = '/class/itpmssd/datasets/'

all_media = pickle.load(open(path+'%s_ig.p' % used_tag,'rb'))

### Building an Instagram co-tag graph

In [None]:
# pip install networkx

import networkx as nx

# functions that help us construct the graph
def graph_add_node(n, g):
    try:
        if g.has_node(n):
            g.node[n]['weight']+=1
        else:
            g.add_node(n)
            g.node[n]['label'] = n
            g.node[n]['weight'] = 1
    except:
        return
            
def graph_add_edge(n1, n2, g):
    if g.has_edge(n1, n2):
        g[n1][n2]['weight']+=1
    else:
        g.add_edge(n1,n2)
        g[n1][n2]['weight']=1

In [None]:
from itertools import combinations

g = nx.Graph()

for m in all_media:
    if not hasattr(m, 'tags'):
        continue
        
    cur_tags = [t.name for t in m.tags]
    for t in m.tags:
        graph_add_node(t.name, g)
        
    for t1, t2 in combinations(cur_tags, 2):
        graph_add_edge(t1, t2, g)

In [None]:
print 'num nodes:', nx.number_of_nodes(g)
print 'num edges:', nx.number_of_edges(g)

In [None]:
# pip install community
# pip install python-louvain

from community import *

In [None]:
part = best_partition(g)
part_vals = sorted(set(part.values()))
part_hist = [part.values().count(x) for x in part_vals]

In [None]:
%pylab inline

plot(part_vals, part_hist, 'ro-')
title('membership to cluster group')
xlabel('cluster group #')
ylabel('membership size')
rcParams['figure.figsize'] = 16,8

top_clusters = []
limit = 10
print 'top %s clusters:' % limit
for i in sorted(part_hist, reverse=True)[:limit]:
    print 'cluster %s has %s members' % (part_hist.index(i), i)
    top_clusters.append(part_hist.index(i))

In [None]:
gc_sub = {}
avg_cc_sub = {}
clo_sub = {}
cl_cen = {}

for c in top_clusters:
    gc_sub[c]=g.subgraph(g.nbunch_iter([x for x,y in part.items() if y==c]))
    avg_cc_sub[c] = round(nx.average_clustering(gc_sub[c]),5)
    avg_deg = round(mean(nx.degree(gc_sub[c]).values()),5)
    med_deg = round(median(nx.degree(gc_sub[c]).values()),5)
    num_triangles = sum(nx.triangles(gc_sub[c]).values())
    cl_cen[c] = nx.degree_centrality(gc_sub[c])

    print '%s) avg_cc:%s, avg_deg:%s, med_deg:%s, num_triangles:%s [%s,%s]' % (c, avg_cc_sub[c], avg_deg, med_deg, num_triangles, gc_sub[c].order() ,gc_sub[c].size())

In [None]:
# show top words from within clusters, sorted by centrality
for c in top_clusters:

    members = {}    
    for m_id in [x for x,y in part.items() if y==c]:
        cur_sn = g.node[m_id]['label']
        #cur_weight = g.node[m_id]['weight']
        cur_deg = round(cl_cen[c][cur_sn],4)
        
        members[cur_sn]=cur_deg
    
    print 'cluster %s' % c
    print sorted(members.items(), key=lambda x: -x[1])[:30]
    print ''


In [None]:
path = '/class/itpmssd/datasets/'
nx.write_gexf(g, path+'%s_insta_tags.gexf' % used_tag)
print path+'%s_insta_tags.gexf' % used_tag

### Now let's add location

In [None]:
import networkx as nx

def graph_add_node(n, g, params):
    
    try:
        if g.has_node(n):
            g.node[n]['weight']+=1
        else:
            g.add_node(n)
            g.node[n]['weight'] = 1
            for k,v in params.items():
                g.node[n][k]=v
    except:
        return
            
def graph_add_edge(n1, n2, g):
    if g.has_edge(n1, n2):
        g[n1][n2]['weight']+=1
    else:
        g.add_edge(n1,n2)
        g[n1][n2]['weight']=1

In [None]:
g = nx.DiGraph()

for m in all_media:
    if not hasattr(m, 'location'):
        continue
        
    try:
        params = {'label':m.user.username, 
                  'likes':m.like_count, 
                  'comments':m.comment_count,
                  'location_name':m.location.name, 
                  'lat':m.location.point.latitude, 
                  'lng':m.location.point.longitude
                  }
    
        graph_add_node(m.user.id, g, params)
    except:
        continue

In [None]:
print g.number_of_nodes()
print g.number_of_edges()

In [None]:
nx.write_gexf(g, path+'%s_insta_location.gexf' % used_tag)
print path+'%s_insta_location.gexf' % used_tag

### Now add edges to this graph

<img src="http://i.imgur.com/06mvfnK.png"/>

<img src="http://i.imgur.com/lChkrbz.png"/>

<img src="http://i.imgur.com/tGpaULp.png"/>

In [None]:
# each IG media object comes with a list of a few ppl who liked it (unfortunately not all)

for u in all_media[5].likes:
    print u.username, u.id, u.full_name, u.profile_picture

In [None]:
# but you might be able to get other interesting user information directly from the IG API
# for user relationship information we're going to have to make authenticated calls
# get your instagram access_token here - http://www.pinceladasdaweb.com.br/instagram/access-token/
# copy the string from the access token generator
# or you can implement it yourself - https://github.com/Instagram/python-instagram/blob/master/get_access_token.py

access_token = 'YOUR_ACCESS_TOKEN'
api = InstagramAPI(access_token=access_token)

In [None]:
ans = api.user_follows(user_id=6222601)
ans