In [35]:
import json
import re
import requests

In [36]:
class InstagramUser:
    
    def __init__(self, user_id, username=None, bio=None, followers_count=None, following_count=None, is_private=False):
        self.id = user_id
        self.username = username
        self.bio = bio
        self.followers_count = followers_count
        self.following_count = following_count
        self.is_private = is_private

In [37]:
class InstagramPost:
    
    def __init__(self, post_id, code, user=None, caption="", display_src=None, is_video=False, created_at=None):
        self.post_id = post_id 
        self.code = code
        self.caption = caption
        self.user = user
        self.display_src = display_src
        self.is_video = is_video
        self.created_at = created_at
        
        
    ''' Remove newlines and carriage returns fr om a post's caption. '''
    def remove_newlines(self):
        if self.caption is None:
            return ""
        else:
            text = re.sub('[\n\r]', ' ', self.caption)
            return text
        
        
    ''' Simple Hashtag extractor to return the hashtags in this post. '''
    def extract_hashtags(self):
        hashtags = []
        if self.caption is None:
            return hashtags
        else:
            for tag in re.findall("#[a-zA-Z0-9]+", self.caption):
                hashtags.append(tag)
            return hashtags

In [54]:
''' Performs searches on Instagram's hashtag search engine and extracts posts for a given hashtag. 
    Does not extract all occurences of the hashtag; instead, extracts the most recent uses of the tag. '''
class HashTagSearch:
    
    def __init__(self, request_timeout=10, error_timeout=10, request_retries=3):
#        super().__init__()
        self.request_timeout = request_timeout
        self.error_timeout = error_timeout
        self.request_retries = request_retries
        self.instagram_root_url = "https://www.instagram.com"
        
        # Query Instagram for the CSRF token.
        self.csrf_token, self.cookie_string = self.get_csrf_and_cookie_string()
        
        
    ''' Extracts recent Instagram posts for a given hashtag. '''
    def extract_recent_tag(self, tag):
        url_string = self.instagram_root_url + "/explore/tags/%s/?__a=1" % tag
        response = requests.get(url_string).text
        result = json.loads(response)
        nodes = result["tag"]["media"]["nodes"]
        cursor = result['tag']['media']['page_info']['end_cursor']
        last_cursor = None
        while len(nodes) != 0 and cursor != last_cursor:
            instagram_posts = self.extract_instagram_posts(nodes)
            self.save_results(instagram_posts)
            las_cursos = cursor
            nodes, cursor = self.get_next_results(tag, cursor)
         
        
    ''' Connects to Instagram and returns a list of headers needed for further processing. '''
    def get_csrf_and_cookie_string(self):
        resp = requests.head(self.instagram_root_url)
        return resp.cookies['csrftoken'], resp.headers['set-cookie']
    
    
    ''' Gets next batch of results in the cursor. '''
    def get_next_results(self, tag, cursor):
        nodes = []
        next_cursor = cursor
        post_data = self.get_query_param(tag, cursor)
        headers = self.get_headers(self.instagram_root_url + "/explore/tags/%s/" %tag)
        try:
            request_json = \
                json.loads(requests.post(self.instagram_root_url + "/query/", data=post_data, headers=headers).text)
                
            if "media" in request_json and "nodes" in request_json["media"]:
                nodes = request_json["media"]["nodes"]
                if "page_info" in request_json["media"]:
                    next_cursor = request_json["media"]["page_info"]["end_cursor"]
        except Exception as e:
            print "Greetings, traveler"
            print e
            
        return nodes, next_cursor
    
    
    ''' Parse the given set of nodes from Instagram's JSON response into InstagramPost objects. '''
    def extract_instagram_posts(self, nodes):
        posts = []
        for node in nodes:
            user = self.extract_owner_details(node["owner"])
            
            # Extract post details.
            text = None
            if "caption" in node:
                text = node["caption"]
            post = InstagramPost(node['id'], node['code'], user=user, caption=text, display_src=node["display_src"],
                                created_at=node["date"], is_video=node["is_video"])
            posts.append(post)
        return posts
    
    
    ''' Extracts the details of an InstagramUser object. '''
    @staticmethod
    def extract_owner_details(owner):
        username = None
        if "username" in owner:
            username = owner["username"]
        is_private = False
        if "is_private" in owner:
            is_private = is_private
        user = InstagramUser(owner['id'], username=username, is_private=is_private)
        return user
    
    
    ''' Returns a bunch of headers needed when querying Instagram. '''
    def get_headers(self, referrer):
        return {
            "referer": referrer,
            "accept": "application/json, text/javascript, */*; q=0.01",
            "Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6",
            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
            "cookie": self.cookie_string,
            "origin": "https://www.instagram.com",
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
                          "Chrome/49.0.2623.87 Safari/537.36",
            "x-csrftoken": self.csrf_token,
            "x-instagram-ajax": "1",
            "X-Requested-With": "XMLHttpRequest"
        }
    
    
    ''' Returns the query params required to load the next page on Instagram. '''
    @staticmethod
    def get_query_param(tag, end_cursor):
        return {
            'q':
                "ig_hashtag(%s) { media.after(%s, 100) {" % (tag, end_cursor) +
                "  count," +
                "  nodes {" +
                "    caption," +
                "    code," +
                "    date," +
                "    dimensions {" +
                "      height," +
                "      width" +
                "    }," +
                "    display_src," +
                "    id," +
                "    is_video," +
                "    likes {" +
                "      count," +
                "      nodes {" +
                "        user {" +
                "          id," +
                "          username," +
                "          is_private" +
                "        }" +
                "      }" +
                "    }," +
                "    comments {" +
                "      count" +
                "    }," +
                "    owner {" +
                "      id," +
                "      username," +
                "      is_private" +
                "    }," +
                "    thumbnail_src" +
                "  }," +
                "  page_info" +
                "}" +
                " }",
            "ref": "tags::show"
        }
    
    
    def save_results(self, instagram_results):
        for post in instagram_results:
            print post.remove_newlines() + "\n"

In [56]:
HashTagSearch().extract_recent_tag("christmas")

#pizza #love #me #cute  #beautiful #girl  #picoftheday #instalike #uae #food #talk #pizza #life  #cat #insatgood #dubai  #Quotes #Inspiration #QuoteOfTheDay  #Quote  #selfie  #like4like #talk2me #photooftheday #fashion #model #tags4likes #christmas #fitness

🙌👀#photooftheday #holidayseason #amazing @sunraylabs #instagood #winter #season #ice #fun #beauty #cold #rain #snow #newyear #blizzard #nature #snowflakes #sunraylabs #staywarm #holidays #instawinter #celebrations #happy #wintertime #celebration #weekend #christmas #snowing #snowman #seasons #beautiful

#pizza #love #me #cute  #beautiful #girl  #picoftheday #instalike #uae #food #talk #pizza #life  #cat #insatgood #dubai  #Quotes #Inspiration #QuoteOfTheDay  #Quote  #selfie  #like4like #talk2me #photooftheday #fashion #model #tags4likes #christmas #fitness

#pizza #love #me #cute  #beautiful #girl  #picoftheday #instalike #uae #food #talk #pizza #life  #cat #insatgood #dubai  #Quotes #Inspiration #QuoteOfTheDay  #Quote  #selfie  #l