In [1]:
import http.client
import json
import csv
import time

In [2]:
def fetch_posts(conn, username):
    conn.request("GET", "/account-feed?username=" + username, headers=headers)

    res = conn.getresponse()
    data = res.read()
    json_string = data.decode('utf-8')
    parsed_data = json.loads(json_string)
    return parsed_data

In [3]:
fields_of_interest = ['shortcode', 'owner', 'is_video', 'edge_media_to_caption', 'edge_media_to_comment', 'edge_liked_by', 'taken_at_timestamp', 'location', 'display_url', 'edge_sidecar_to_children', 'edge_media_to_caption']

In [4]:
# Extracts the fields of interests 
def preprocess_posts(posts):
    preprocessed_posts = []
    print(posts)
    for post in posts:
        post = post["node"]
        new_post = {}
        for elem in fields_of_interest:
            if elem in post:
                new_post[elem] = post[elem]
            
        preprocessed_posts.append(new_post)

    return preprocessed_posts
        

In [5]:
# Flattening the JSON
def flatten(posts):
    ps = []
    for post in posts:
        p = {}
        p['owner_id'] = post['owner']['id']
        p['owner_username'] = post['owner']['username']
        p['shortcode'] = post['shortcode']
        p['is_video'] = post['is_video']
        # Caption
        for elem in post['edge_media_to_caption']['edges']:
            if 'caption' in p:
                p['caption'] += elem['node']['text']
            else:
                p['caption'] = elem['node']['text']
        p['comments'] = post['edge_media_to_comment']['count']
        p['likes'] = post['edge_liked_by']['count']
        p['created_at'] = post['taken_at_timestamp']
        p['location'] = post['location']
        p['imageUrl'] = post['display_url']
        
        # multiple images check
        if 'edge_sidecar_to_children' in post:
            p['multiple_images'] = True
        else:
            p['multiple_images'] = False
        
        ps.append(p)
    
    return ps
            
        
        

In [6]:
def convert_to_csv(data, filename):
    # Write CSV file
    with open(filename, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=data[0].keys())

        # Write header
        writer.writeheader()

        # Write data
        writer.writerows(data)

In [7]:
def scrape_account(username):
    print(f"Started scraping: {username}")
    conn = http.client.HTTPSConnection("instagram130.p.rapidapi.com")

    headers = {
        'X-RapidAPI-Key': "<API_KEY>",
        'X-RapidAPI-Host': "instagram130.p.rapidapi.com"
    }
    data = fetch_posts(conn, username)
    posts = preprocess_posts(data)
    posts = flatten(posts)
    convert_to_csv(posts, "./data/" + username + ".csv")
    print(f"Done scraping: {username} :)")

In [8]:
accounts_list = [] # Add your list here

In [12]:
for account in accounts_list:
    try:
        scrape_account(account)
    except Exception as e:
        print(f"Failed: {e}")

['1misssmeis', '3ala2o', '433', '6senseofficial', '7ikhals', '_foodstories_', '_hollyt', '_ingo_1', '_mariannejacobsen_', '_picolo', '_tinamaria', '_tuck4', 'a.sharif92', 'aaronsanimals', 'abanddoned', 'achievetheimpossible', 'adenorah', 'adesignersmind', 'adingattamimi_photography', 'adriancmurray', 'afnan_albatel', 'afofa', 'aguynamedpatrick', 'ahmet.erdem', 'aialahernando', 'aka.the.one', 'akusepp', 'aleksmusika', 'alenaakhmadullina', 'alessandro_carpentiero', 'alessioalbi', 'alexandrabring', 'alexandreagarza', 'alexcentomo', 'alexhonnold', 'alexiarayee', 'alexisren', 'alexmidler', 'alexstrohl', 'alice_gao', 'aliona_hilt', 'alissaviolet', 'alldaytravel', 'alliemtaylor', 'amandabisk', 'amandacerny', 'amiraa88', 'amivitale', 'ammish', 'amymarie', 'amytoensing', 'ana_lombardini', 'anadeliafitness', 'anddicted', 'andicsinger', 'andreabadendyck', 'andreadenver3', 'andreamelchiorre1', 'andreleonardooficial', 'andy_mann', 'andyheart', 'angela_mazzanti', 'angelicablick', 'anilarjandas', 'an