Skip to content

Commit

Permalink
Fix new hashtag endpoint for graphql
Browse files Browse the repository at this point in the history
Hashtags can still be mined (with enough patience, partially solves #3). Location sites however unfortunately not.
  • Loading branch information
do-me committed Apr 23, 2022
1 parent 0d62971 commit 2c656b9
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions app/fast-instagram-scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import threading

# just in the beginning: define empty variables
# IMPORTANT: when pausing (=interrupting jupyter) and resuming do not execute this cell!
# just execute the main loop below as last_cursor and post_list will be in memory
post_list = []

# a cursor is an arbitrary hash to paginate through Instagram's posts
Expand All @@ -45,10 +43,10 @@
# returns right Instagram link
def ilink(cursor=""):
if location_or_hashtag == "location":
instalink = 'https://instagram.com/graphql/query/?query_hash='+location_hash+'&variables={"id":"' + str(object_id_or_string) + '","first":50,"after":"'+ cursor +'"}'
instalink = 'https://www.instagram.com/explore/locations/' + str(object_id_or_string) + '/?__a=1&max_id=' + cursor
return instalink
elif location_or_hashtag == "hashtag":
instalink = 'https://instagram.com/graphql/query/?query_hash='+hashtag_hash+'&variables={"tag_name":"' + str(object_id_or_string) + '","first":50,"after":"'+ cursor +'"}'
instalink = 'https://www.instagram.com/explore/tags/' + str(object_id_or_string) + '/?__a=1&max_id=' + cursor
return instalink
else:
raise RuntimeError('location_or_hashtag variable must be location or hashtag')
Expand Down Expand Up @@ -150,12 +148,12 @@ def torsession():
print("Tor end node blocked.")
return # go back to main loop and get next session

if idata["data"][location_or_hashtag] == None:
if idata["graphql"][location_or_hashtag] == None:
print("No posts available!")
return "no_more_page"

# access response json
edge_to_media = idata["data"][location_or_hashtag]["edge_{}_to_media".format(location_or_hashtag)]
edge_to_media = idata["graphql"][location_or_hashtag]["edge_{}_to_media".format(location_or_hashtag)]

# if while scraping new posts appear, they will be considered!
total_posts = edge_to_media["count"]
Expand All @@ -166,7 +164,7 @@ def torsession():

# append location information for location scraping
if location_or_hashtag == "location":
ploc = idata["data"][location_or_hashtag]
ploc = idata["graphql"][location_or_hashtag]
ipage = add_locations_data_to_cleaned_node(ipage)
else:
ipage = add_locations_data_to_cleaned_node(ipage, just_clean=True)
Expand Down Expand Up @@ -419,4 +417,4 @@ def scrape_subprocess(one_obj):
# try:
# scrape()
# except:
# print("Finished with error - see log. Continuing with next item.")
# print("Finished with error - see log. Continuing with next item.")

0 comments on commit 2c656b9

Please sign in to comment.