POST vs. GET methods

- two HTTP request methods commonly used for a request-response between a client and a server
- GET: query string sent in the URL
- POST: query string sent in the HTTP message body

Other differences

- POST requests are never cached
- POST requests do not remain in the browser history
- POST requests cannot be bookmarked
- POST requests have no restrictions on data length

### Instagram API

Go here to register your application - http://instagram.com/developer/
We need to generate an Oauth token and use the client_id, client_secret when making calls to the IG API.

We'll be using a python wrapper (makes things much simpler) which you should intall by running the following
command in your remote machine:


pip install python-instagram


Full documentation of the python library here: https://github.com/Instagram/python-instagram

In [None]:
#client_id = ''
#client_secret = ''

from instagram.client import InstagramAPI

api = InstagramAPI(client_id=client_id, client_secret=client_secret)

### Instagram Tag Count

In [None]:
# get ig tag info -> how many IG posts have the given hashtag
used_tag = 'itpnyu'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'GOPdebate'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'IStandWithAhmed'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'PrayForMina'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'PopeInNYC'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

### Instagram Media

In [None]:
# get ig media based on tag

max_tag_id = 0
media_per_query = 33
media, _next = api.tag_recent_media(media_per_query, max_tag_id, used_tag)

In [None]:
print media

In [None]:
print len(media)

In [None]:
m = media[0]
m

In [None]:
# object of type: IG media

type(m)

In [None]:
# returns a list of valid attributes for the given object -> IG media object / class

dir(m)

In [None]:
print m.link
print m.tags
print m.likes

In [None]:
# let's save all media items into an array called 'all_media'

all_media = []

for m in ans_media:
    print m.created_time
    print m.id
    print m.user
    print m.link
    print ''
    all_media.append(m)

In [None]:
# this item helps us paginate through the IG media results

print _next

In [None]:
# one way to parse data from the '_next_' object

from urlparse import urlparse

parsed = urlparse(next_)
print parsed
print ''
print parsed.query

In [None]:
# splitting the query portion of the parsed URL by '&' character

parsed.query.split('&')

In [None]:
# now splitting by the '=' character

[x.split('=') for x in parsed.query.split('&')]

In [None]:
# creating a dictionary 

print {a:b for a,b in [x.split('=') for x in parsed.query.split('&')]}

In [None]:
# lets make a function out of this -> parse out the max_tag_id parameter

def extract_max_tag_id(txt):
    parsed = urlparse(txt)
    params = {a:b for a,b in [x.split('=') for x in parsed.query.split('&')]}
    return params['max_tag_id']


In [None]:
max_tag_id = 0
media_per_query = 33
MAX_ITEMS = 10000

all_media, next_ = api.tag_recent_media(media_per_query, max_tag_id, used_tag)

while next_:
    more_media, next_ = api.tag_recent_media(count=media_per_query, tag_name=used_tag, with_next_url=next_)
    all_media.extend(more_media)
        
    print len(all_media)
    if len(all_media)>MAX_ITEMS:
        break

In [None]:
len(all_media)

In [None]:
# save our data in pickled format (this is in case our notebook crashes __gasp__ - so that we don't ahve to grab it again)

import pickle
path = '/class/itpmssd/datasets/'

pickle.dump(all_media, open(path+'%s_ig.p' % used_tag,'wb'))

In [None]:
import pickle
path = '/class/itpmssd/datasets/'

all_media = pickle.load(open(path+'%s_ig.p' % used_tag,'rb'))

### Let's look at our data

In [None]:
# Number of unique users

users = set([m.user.username for m in all_media])
len(users)

In [None]:
# Show ten most liked IG posts

for m in sorted(all_media, key=lambda x: -x.like_count)[:10]:
    print m.like_count
    print m.caption
    print m.link, '\n'

In [None]:
# comments - 10 most commented on post

for m in sorted(all_media, key=lambda x: -x.comment_count)[:10]:
    print m.like_count, m.caption, m.link, '\n'

In [None]:
# comments - most commented on post

for m in sorted(all_media, key=lambda x: -x.like_count)[:20]:
    print m.like_count, m.comment_count, m.link

In [None]:
# use pandas - plot over time

import pandas as pd

df = pd.DataFrame({'times':[m.created_time for m in all_media], 'users':[m.user.username for m in all_media], 'posts':[m.link for m in all_media]})

In [None]:
def make_date(d):
    d = d.replace(second=0, minute=0)
    return d

df['dt']=df[['times']].apply(lambda x: make_date(x['times']), axis=1)

In [None]:
df.set_index('dt')

In [None]:
%pylab inline

df.groupby(df.dt).size().plot(figsize=(20,6))
title('#%s - IG posts over time' % used_tag)

In [None]:
df[-10:]

In [None]:
df.groupby(df.dt).size()['2015-09-20':].plot(figsize=(20,6))
title('#%s - IG posts over time' % used_tag)

### Scatter Plot

In [None]:
%pylab inline

l = [m.like_count for m in all_media]
c = [m.comment_count for m in all_media]

# let's plot the relationship between these two variables

scatter(l,c,s=50,alpha=.1)
title('likes vs. comments')
ylabel('comment count')
xlabel('like count')


In [None]:
l_log = [log(m.like_count) for m in all_media]
c_log = [log(m.comment_count) for m in all_media]

# let's plot the relationship between these two variables
rcParams['figure.figsize'] = 12,6
scatter(l_log,c_log,s=50,alpha=.1)
title('likes vs. comments')
ylabel('comment count')
xlabel('like count')


### tags

In [None]:
from collections import Counter

#all_tags = [t.name for t in m.tags for m in all_media if hasattr(m, 'tags')]

In [None]:
all_tags = []
for m in all_media:
    if hasattr(m, 'tags'):
        all_tags += [t.name for t in m.tags]

In [None]:
counted_tags = Counter(all_tags)
counted_tags.most_common(40)

### location

In [None]:
# check if the object - m - has a location attribute
location_names = set([m.location.name for m in all_media if hasattr(m, 'location')])
len(location_names)
location_names

In [None]:
locations = {m.user.username:m.location for m in all_media if hasattr(m, 'location')}

In [None]:
for u,loc in locations.items()[:10]:
    print u,loc

In [None]:
user_loc = locations['laverony']

print user_loc.id
print user_loc.name
print user_loc.point.latitude
print user_loc.point.longitude

In [None]:
# pip install geopy
# reverse geo-lookup -> for more geographic information
import json
from geopy.geocoders import Nominatim
geolocator = Nominatim()

location = geolocator.reverse((user_loc.point.latitude, user_loc.point.longitude))
print location.address
print json.dumps(location.raw, indent=1)
