In [1]:
import os
from pymongo import MongoClient

In [2]:
from instagram.client import InstagramAPI

INSTAGRAM_CLIENT_ID = os.environ['INSTAGRAM_DD_CLIENT_ID']
INSTAGRAM_CLIENT_SECRET = os.environ['INSTAGRAM_DD_CLIENT_SECRET']
INSTAGRAM_ACCESS_TOKEN = os.environ['INSTAGRAM_DD_ACCESS_TOKEN']
client_ip = '127.0.0.1'
api = InstagramAPI(client_id=INSTAGRAM_CLIENT_ID, 
                   client_secret=INSTAGRAM_CLIENT_SECRET, 
                   client_ips=client_ip,
                   access_token=INSTAGRAM_ACCESS_TOKEN)

In [3]:
def pullMediaByUser(user_id):
    media, next_ = api.user_recent_media(user_id=user_id)
    while next_:
        more_media, next_ = api.user_recent_media(with_next_url=next_)
        media.extend(more_media)
        
    return media

In [4]:
def pullMediaByHashtag(tag_name, count=100):
    media, next_ = api.tag_recent_media(tag_name=tag_name)
    while next_:
        more_media, next_ = api.tag_recent_media(with_next_url=next_, tag_name=tag_name)
        media.extend(more_media)
        
        if len(media) >= count:
            break
            
        if len(media) % 1000 == 0:
            print (len(media))
        
    return media

In [20]:
def pullTagAndUploadIntoMongo(hashtag, numToPull):
    recent_media = pullMediaByHashtag(hashtagToPull, numToPull)
    print ("Pulled records")
    
    data = []
    for media in recent_media:
        item = {}

        ## user details
        item['username'] = media.user.username
        item['user_id'] = media.user.id

        ## media details
        item['id'] = media.id
        item['type'] = media.type
        item['filter'] = media.filter
        item['created_time'] = media.created_time

        ## check if there is a caption
        item['caption'] = ''
        if media.caption != None:
            item['caption'] = media.caption.text

        ## parse tags
        item['tags'] = []
        for tag in media.tags:
            item['tags'].append(tag.name)

        ## parse comments
        item['comment_count'] = media.comment_count
        item['comments'] = []
        for comment in media.comments:
            entry = {}
            entry['username'] = comment.user.username
            entry['user_id'] = comment.user.id
            entry['created_time'] = comment.created_at
            entry['text'] = comment.text
            item['comments'].append(entry)

        ## parse like information
        item['like_count'] = media.like_count
        ## adjust for liking own picture
        if media.user_has_liked == True:
            item['like_count'] -= 1

        ## parse location if available
        item['location'] = {}
        if 'location' in dir(media):
            if 'latitude' in dir(media.location.point):
                item['location']['latitude'] = media.location.point.latitude
                item['location']['longitude'] = media.location.point.longitude

        data.append(item)
        
    client = MongoClient('mongodb://localhost:27017/')
    db = client['instagram']
    collectionName = hashtag + '-tags'
    collection = db[collectionName]
    
    result = collection.insert_many(data)
    print (len(result.inserted_ids))

In [21]:
pullTagAndUploadIntoMongo('water', 10000)

1000
2000
3000
4000
5000
6000
7000
8000
9000
Pulled records
10000


In [8]:
## see what's in the data
print (dir(recent_media[1]))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__unicode__', '__weakref__', 'caption', 'comment_count', 'comments', 'created_time', 'filter', 'get_low_resolution_url', 'get_standard_resolution_url', 'get_thumbnail_url', 'id', 'images', 'like_count', 'likes', 'link', 'object_from_dictionary', 'tags', 'type', 'user', 'user_has_liked', 'users_in_photo']


In [10]:
## see what's in there

print(data[0])
print ('')
print(data[1])
print ('')
print(data[2])

{'location': {}, 'comment_count': 0, 'like_count': 34, 'caption': '#Repost @woowbike with @repostapp.\n・・・\nAveroy Island, Norway.\n--- #cycling #mtb #sports #ride #fitness #biking #activelife #outsideisfree #bike #biking #outdoor #mountain #bic #eurovelo #greenroutes #health #urbancycling #mountainbike #mountainbiking #field #bicycling #bicyclelife #bicycleride #bicycle #bikelife #bikephoto #bikeride #ridingbike #instabike', 'created_time': datetime.datetime(2015, 8, 20, 11, 17, 21), 'username': 'bisikletizm', 'filter': 'Normal', 'user_id': '1944314479', 'comments': [], 'id': '1055701448903753879_1944314479', 'tags': ['outdoor', 'bicycle', 'bicycleride', 'ridingbike', 'urbancycling', 'bikeride', 'mountainbike', 'bic', 'mtb', 'instabike', 'repost', 'mountain', 'cycling', 'mountainbiking', 'bicycling', 'greenroutes', 'ride', 'outsideisfree', 'sports', 'field', 'bicyclelife', 'bike', 'health', 'eurovelo', 'fitness', 'bikephoto', 'activelife', 'biking', 'bikelife'], 'type': 'image'}

{'lo

In [11]:
for item in data[:10]:
    print(item['created_time'])

2015-08-20 11:17:21
2015-08-20 11:14:29
2015-08-20 11:06:52
2015-08-20 10:58:42
2015-08-20 10:46:07
2015-08-20 10:44:12
2015-08-20 10:23:47
2015-08-20 10:12:07
2015-08-20 09:18:44
2015-08-20 09:09:35


In [12]:
for item in data[-10:]:
    print(item['created_time'])

2015-06-27 04:14:21
2015-06-27 04:10:47
2015-06-27 03:51:34
2015-06-27 03:45:57
2015-06-27 02:59:50
2015-06-27 02:52:24
2015-06-27 02:50:14
2015-06-27 02:41:21
2015-06-27 02:40:43
2015-06-27 02:39:00
