POST vs. GET methods

- two HTTP request methods commonly used for a request-response between a client and a server
- GET: query string sent in the URL
- POST: query string sent in the HTTP message body

Other differences

- POST requests are never cached
- POST requests do not remain in the browser history
- POST requests cannot be bookmarked
- POST requests have no restrictions on data length

### Instagram API

Go here to register your application - http://instagram.com/developer/
We need to generate an Oauth token and use the client_id, client_secret when making calls to the IG API.

We'll be using a python wrapper (makes things much simpler) which you should intall by running the following
command in your remote machine:


pip install python-instagram


Full documentation of the python library here: https://github.com/Instagram/python-instagram

In [1]:
#client_id = ''
#client_secret = ''

client_id = ''
client_secret = ''

from instagram.client import InstagramAPI

api = InstagramAPI(client_id=client_id, client_secret=client_secret)

### Instagram Tag Count

In [2]:
# get ig tag info -> how many IG posts have the given hashtag
used_tag = 'sanbernardino'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

178247


In [None]:
# get ig tag info
used_tag = 'GOPdebate'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'IStandWithAhmed'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'MillionStudentMarch'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

In [None]:
# get ig tag info
used_tag = 'PopeInNYC'
num_media_tag = api.tag(used_tag).media_count
print num_media_tag

### Instagram Media

In [3]:
# get ig media based on tag

max_tag_id = 0
media_per_query = 33
media, _next = api.tag_recent_media(media_per_query, max_tag_id, used_tag)

In [None]:
print media

In [None]:
print len(media)

In [4]:
m = media[0]
m

Media: 1132263260831759555_272596678

In [None]:
# object of type: IG media

type(m)

In [None]:
# returns a list of valid attributes for the given object -> IG media object / class

dir(m)

In [None]:
print m.link
print m.tags
print m.likes

In [5]:
# let's save all media items into an array called 'all_media'

all_media = []

for m in media:
    print m.created_time
    print m.id
    print m.user
    print m.link
    print m.__dict__.get('location',None)
    print ''
    all_media.append(m)

2015-12-04 02:32:00
1132263260831759555_272596678
User: craft_licious
https://www.instagram.com/p/-2m2_OGjzD/
None

2015-12-04 02:31:47
1132263151183235982_2094245673
User: pentaxflexxink3
https://www.instagram.com/p/-2m1ZGiouO/
None

2015-12-04 02:31:40
1132263088104486906_1359974172
User: elsteveador_15
https://www.instagram.com/p/-2m0eWwTf6/
None

2015-12-04 02:31:12
1132262856452358446_2289528471
User: bearwithhat808
https://www.instagram.com/p/-2mxGnNkEu/
None

2015-12-04 02:30:39
1132262582086156586_2289528471
User: bearwithhat808
https://www.instagram.com/p/-2mtHFtkEq/
None

2015-12-04 02:30:38
1132262572524052610_180169359
User: aztecparrot
https://www.instagram.com/p/-2ms-LxCCC/
Location: 237337848 (Point: (34.097346696, -117.295646148))

2015-12-04 02:30:30
1132262506772748000_2260927996
User: renacimientoyouth
https://www.instagram.com/p/-2msA8rtbg/
None

2015-12-04 02:30:28
1132262488896464718_1482772840
User: catspayneuters
https://www.instagram.com/p/-2mrwTLGdO/
None

201

In [None]:
# this item helps us paginate through the IG media results

print _next

In [None]:
# one way to parse data from the '_next_' object

from urlparse import urlparse

parsed = urlparse(next_)
print parsed
print ''
print parsed.query

In [None]:
# splitting the query portion of the parsed URL by '&' character

parsed.query.split('&')

In [None]:
# now splitting by the '=' character

[x.split('=') for x in parsed.query.split('&')]

In [None]:
# creating a dictionary 

print {a:b for a,b in [x.split('=') for x in parsed.query.split('&')]}

In [6]:
# lets make a function out of this -> parse out the max_tag_id parameter

def extract_max_tag_id(txt):
    parsed = urlparse(txt)
    params = {a:b for a,b in [x.split('=') for x in parsed.query.split('&')]}
    return params['max_tag_id']


In [7]:
max_tag_id = 0
media_per_query = 33
MAX_ITEMS = 20000

all_media, next_ = api.tag_recent_media(media_per_query, max_tag_id, used_tag)

while next_:
    more_media, next_ = api.tag_recent_media(count=media_per_query, tag_name=used_tag, with_next_url=next_)
    all_media.extend(more_media)
        
    print len(all_media)
    if len(all_media)>MAX_ITEMS:
        break

66
99
132
165
198
231
264
297
330
363
396
429
462
495
528
561
594
627
660
693
726
759
792
825
858
891
924
957
990
1023
1056
1089
1122
1155
1188
1221
1254
1287
1320
1353
1386
1419
1452
1485
1518
1551
1584
1617
1650
1683
1716
1749
1782
1815
1848
1881
1914
1947
1980
2013
2046
2079
2112
2145
2178
2211
2244
2277
2310
2343
2376
2409
2442
2475
2508
2541
2574
2607
2640
2673
2706
2739
2772
2805
2838
2871
2904
2937
2970
3003
3036
3069
3102
3135
3168
3201
3234
3267
3300
3333
3366
3399
3432
3465
3498
3531
3564
3597
3630
3663
3696
3729
3762
3795
3828
3861
3894
3927
3960
3993
4026
4059
4092
4125
4158
4191
4224
4257
4290
4323
4356
4389
4422
4455
4488
4521
4554
4587
4620
4653
4686
4719
4752
4785
4818
4851
4884
4917
4950
4983
5016
5049
5082
5115
5148
5181
5214
5247
5280
5313
5346
5379
5412
5445
5478
5511
5544
5577
5610
5643
5676
5709
5742
5775
5808
5841
5874
5907
5940
5973
6006
6039
6072
6105
6138
6171
6204
6237
6270
6303
6336
6369
6402
6435
6468
6501
6534
6567
6600
6633
6666
6699
6732
6765
6798
6831
6

In [9]:
len(all_media)

20031

In [8]:
# save our data in pickled format (this is in case our notebook crashes __gasp__ - so that we don't ahve to grab it again)

import pickle
path = '/class/itpmssd/datasets/'

pickle.dump(all_media, open(path+'%s_ig.p' % used_tag,'wb'))

In [None]:
import pickle
path = '/class/itpmssd/datasets/'

all_media = pickle.load(open(path+'%s_ig.p' % used_tag,'rb'))

### Let's look at our data

In [None]:
# Number of unique users

users = set([m.user.username for m in all_media])
len(users)

In [None]:
# Show ten most liked IG posts

for m in sorted(all_media, key=lambda x: -x.like_count)[:10]:
    print m.like_count
    print m.caption
    print m.link, '\n'

In [None]:
# comments - 10 most commented on post

for m in sorted(all_media, key=lambda x: -x.comment_count)[:10]:
    print m.like_count, m.caption, m.link, '\n'

In [None]:
# comments - most commented on post

for m in sorted(all_media, key=lambda x: -x.like_count)[:20]:
    print m.like_count, m.comment_count, m.link

In [None]:
# use pandas - plot over time

import pandas as pd

df = pd.DataFrame({'times':[m.created_time for m in all_media], 'users':[m.user.username for m in all_media], 'posts':[m.link for m in all_media]})

In [None]:
def make_date(d):
    d = d.replace(second=0, minute=0)
    return d

df['dt']=df[['times']].apply(lambda x: make_date(x['times']), axis=1)

In [None]:
df.set_index('dt')

In [None]:
%pylab inline

df.groupby(df.dt).size().plot(figsize=(20,6))
title('#%s - IG posts over time' % used_tag)

In [None]:
df[-10:]

In [None]:
df.groupby(df.dt).size()['2015-09-20':].plot(figsize=(20,6))
title('#%s - IG posts over time' % used_tag)

### Scatter Plot

In [None]:
%pylab inline

l = [m.like_count for m in all_media]
c = [m.comment_count for m in all_media]

# let's plot the relationship between these two variables

scatter(l,c,s=50,alpha=.1)
title('likes vs. comments')
ylabel('comment count')
xlabel('like count')


In [None]:
l_log = [log(m.like_count) for m in all_media]
c_log = [log(m.comment_count) for m in all_media]

# let's plot the relationship between these two variables
rcParams['figure.figsize'] = 12,6
scatter(l_log,c_log,s=50,alpha=.1)
title('likes vs. comments')
ylabel('comment count')
xlabel('like count')


### tags

In [None]:
from collections import Counter

#all_tags = [t.name for t in m.tags for m in all_media if hasattr(m, 'tags')]

In [None]:
all_tags = []
for m in all_media:
    if hasattr(m, 'tags'):
        all_tags += [t.name for t in m.tags]

In [None]:
counted_tags = Counter(all_tags)
counted_tags.most_common(40)

### location

In [None]:
# check if the object - m - has a location attribute
location_names = set([m.location.name for m in all_media if hasattr(m, 'location')])
len(location_names)
location_names

In [None]:
locations = {m.user.username:m.location for m in all_media if hasattr(m, 'location')}

In [None]:
for u,loc in locations.items()[:10]:
    print u,loc

In [None]:
user_loc = locations['laverony']

print user_loc.id
print user_loc.name
print user_loc.point.latitude
print user_loc.point.longitude

In [None]:
# pip install geopy
# reverse geo-lookup -> for more geographic information
import json
from geopy.geocoders import Nominatim
geolocator = Nominatim()

location = geolocator.reverse((user_loc.point.latitude, user_loc.point.longitude))
print location.address
print json.dumps(location.raw, indent=1)
