In [None]:
import requests
from datetime import datetime
import pandas
import json
from itertools import repeat

In [None]:
# Twitter channel ids of German parties:
channel_ids_twitter = ["26458162", "844081278", "20429858", "21107582", "14553288", "44101578", "39475170"] 

# format request URL
def params_to_query(param_dict):
    return "&".join([ key + "=" + param_dict[key] for key in param_dict.keys() if param_dict[key] is not None ])

In [None]:
# crawl tweets
def crawl_tweets(user_id):
    query = params_to_query({
                "start_time": "2023-01-20T00:00:00Z",
                "max_results": "100",
                "tweet.fields": ",".join([ 'id', 'text', 'created_at', 'lang', 'author_id', 
                                           'entities', 'referenced_tweets', 'public_metrics' ]),
                "expansions": ",".join(['referenced_tweets.id' ])
            })

    headers = {
               "Authorization": "xxxxxxxxxxxxxxxx" # get your own token from Twitter's developer platform
              }

    url = f"https://api.twitter.com/2/users/{user_id}/tweets?{query}"
    entries = crawl_iter_tweets(url, headers)
    entries = map(decode_tweet, entries, repeat(user_id))
    df = pandas.DataFrame.from_dict(entries)
    return df


# request data via request URL
def crawl_iter_tweets(url, headers):
    while url is not None:
        print(url)
        r = requests.get(url, headers=headers)
        url = None

        if r.status_code != 200:
            print(r.status_code)
            break
            
        #print(r.text)
        
        body = r.json()
        print(body)
        data = body['data']
        for entry in data:
            yield entry


# store requested data in dictionary
def decode_tweet(entry, user_id):
    content_3rdparty_id=entry['id']
    content_text=entry['text']
    content_url=f"https://twitter.com/{user_id}/status/{entry['id']}"
    content_date_fetched=datetime.now()
    content_date_created=datetime.strptime(entry['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
    content_lang=entry['lang']
    content_type=entry['referenced_tweets'][0]['type'] if 'referenced_tweets' in entry else "tweet"
    content_external_link=entry['entities']['urls'][0]['expanded_url'] if 'entities' in entry and 'urls' in entry['entities'] else None

    return {"content_3rdparty_id": content_3rdparty_id, "content_text": content_text, "content_url": content_url,
            "content_date_fetched": content_date_fetched, "content_date_created": content_date_created,
            "content_lang": content_lang, "content_type": content_type, "content_external_link": content_external_link}


# run
df_twitter = pandas.DataFrame()
for user_id in channel_ids_twitter:
    #crawl_tweets(user_id)
    df_twitter = pandas.concat([df_twitter, crawl_tweets(user_id)])

df_twitter.to_csv("twitter_output.csv", sep=";", index=False)