In [None]:
import requests
from datetime import datetime
import pandas
from time import sleep
import json

In [None]:
# Facebook channel ids of German parties:
channel_ids_facebook = ["540404695989874", "47694585682", "21289227249", "47217143218", "78502295414", "47930567748"]

# Instagram channel ids of German parties:
channel_ids_instagram = ["2016981347", "1573431041", "1558377791", "211179", "2127438079", "537777060", "1484534097"]

# format request URL
def params_to_query(param_dict):
    return "&".join([ key + "=" + param_dict[key] for key in param_dict.keys() if param_dict[key] is not None ])

In [None]:
# crawl facebook and instagram posts
facebook_token = "xxxxxxxxxxx" # get your own token from CrowdTangles' Dashboard for Facebook pages
instagram_token = "xxxxxxxxxxx" # get your own token from CrowdTangles' Dashboard for Instagram pages


def crawl_posts(token, channel_ids):
    query = params_to_query({
            "token": token,
            "accounts": ",".join(channel_ids),
            "startDate": (datetime(2023, 1, 18)).isoformat(),
            "sortBy": "date",
            "count": "100"
            })

    url = f"https://api.crowdtangle.com/posts?{query}"
    entries = crawl_iter_posts(url)
    entries = map(decode_post, entries)
    df = pandas.DataFrame.from_dict(entries)
    return df


# request data via request URL
def crawl_iter_posts(url):
    while url is not None:
        print(url)
        r = requests.get(url)
        
        if r.status_code != 200:
            print(r.status_code)
            break

        body = r.json()
        #print(body)
        data = body['result']['posts']
        for entry in data:
            yield entry

        pagination = body['result']['pagination']
        if 'nextPage' in pagination and pagination['nextPage'] != None:
            url = pagination['nextPage']
        else:
            url = None


# store requested data in dictionary
def decode_post(entry):
    if entry['platform'].lower() == 'facebook':
        accountId, postId = entry['platformId'].split("_")
        content_url=entry['postUrl']
        content_text=entry['message'] if 'message' in entry else ""
        content_date_created=datetime.strptime(entry['date'], "%Y-%m-%d %H:%M:%S")
        content_date_fetched=datetime.now()
        content_type=entry['type']
        content_external_link=entry['link'] if 'link' in entry else None
        content_photo_link=entry['media'][0]['url'] if 'media' in entry and entry['media'][0]['type']=='photo' else None

    if entry['platform'].lower() == 'instagram':
        postId, accountId = entry['platformId'].split("_")
        content_url=entry['postUrl']
        content_text=entry['description'] if 'description' in entry else ""
        content_date_created=datetime.strptime(entry['date'], "%Y-%m-%d %H:%M:%S")
        content_date_fetched=datetime.now()
        content_type=entry['type']
        content_external_link=entry['expandedLinks'][0]['expanded'] if 'expandedLinks' in entry else None
        content_photo_link=content_url + "media/?size=l"
        
    return {"accountId": accountId, "postId": postId, "content_url": content_url, "content_text": content_text,
            "content_date_created": content_date_created.strftime("%Y-%m-%d %H:%M:%S"), 
            "content_date_fetched": content_date_fetched.strftime("%Y-%m-%d %H:%M:%S"),
            "content_type": content_type, "content_external_link": content_external_link,
            "content_photo_link": content_photo_link}


# download images
def download_images(df_posts, platform):
    for i, row in list(df_posts.iterrows())[0:10]:
        if row['content_photo_link'] != None:
            r = requests.get(row['content_photo_link'])
            if r.status_code == 200:
                
                if platform == "facebook:"
                    file_name = row["content_url"].split("/")[5] + ".jpg" # faceboook
                else: # platform == "instagram"
                    file_name = row["content_url"].split("/")[4] + ".jpg" # instagram

                file = open("images/" + file_name, "wb")
                file.write(r.content)
                file.close()
            else:
                print(row["content_photo_link"], " failed image download")
            sleep(5)


# run
df_posts_facebook = crawl_posts(facebook_token, channel_ids_facebook)
df_posts_instagram = crawl_posts(instagram_token, channel_ids_instagram)

df_posts_facebook.to_csv("facebook_output.csv", sep=";", index=False)
df_posts_instagram.to_csv("instagram_output.csv", sep=";", index=False)

download_images(df_posts_facebook, "facebook")
download_images(df_posts_instagram, "instagram")