In [1]:
import tweepy
import time
import pandas as pd
import numpy as np
import requests
import yaml
from pathlib import Path
import json

In [3]:

with open("config/paramters.yml", "r") as file:
    config = yaml.safe_load(file)

creds = config["twitter_credentials"]

bearer_token = creds["bearer_token"]

input_file = "data/politicians4.xlsx"
output_file = Path("data/target_posts.jsonl")
done_file = Path("data/completed_ids.txt")


In [4]:
client = tweepy.Client(bearer_token=bearer_token)

In [15]:
def fetch_user_posts(user_id, max_results=100):
    """
    Fetch recent tweets for a given user ID using Tweepy client.
    Handles rate limits and other common errors gracefully.
    """
    user_id = str(int(user_id))
    try:
        tweets = client.get_users_tweets(
            id=user_id,
            max_results=max_results,
            tweet_fields=[
                'id', 'text', 'attachments', 'author_id', 'conversation_id', 'created_at',
                'in_reply_to_user_id', 'lang', 'non_public_metrics', 'organic_metrics',
                'possibly_sensitive', 'promoted_metrics', 'public_metrics', 'referenced_tweets',
                'reply_settings', 'source'
            ]
        )
        return tweets

    except tweepy.TooManyRequests:
        print("Rate limit reached. Sleeping for 15 minutes...")
        time.sleep(15 * 60 + 15)  # wait before retry
        return fetch_user_posts(user_id, max_results)  # retry after sleep

    except tweepy.NotFound:
        print(f"User with ID '{user_id}' not found.")
        return None

    except tweepy.Unauthorized:
        print(f"Unauthorized access to tweets for user ID '{user_id}'.")
        return None

    except Exception as e:
        print(f"Unexpected error fetching tweets for user ID '{user_id}': {e}")
        return None


In [5]:
df = pd.read_excel(input_file)

In [10]:
df[['ID', 'VORNAME', 'NACHNAME','USERNAME', 'X_ID']].iloc[5]

ID                       11002735
VORNAME                 Friedrich
NACHNAME                     Merz
USERNAME            bundeskanzler
X_ID        1915405915085562112.0
Name: 5, dtype: object

In [None]:
tweets = fetch_user_posts(df['X_ID'].iloc[5])

Rate limit reached. Sleeping for 15 minutes...


In [None]:
def fetch_user_posts(user_id, bearer_token, max_results=100):
    # Replace with real API call
    tweets = client.get_users_tweets(
        id=user_id,
        max_results=100,
        tweet_fields=['id', 'text', 'attachments', 'author_id', 'conversation_id', 'created_at', 'in_reply_to_user_id', 'lang', 'non_public_metrics', 'organic_metrics', 'possibly_sensitive', 'promoted_metrics', 'public_metrics', 'referenced_tweets', 'reply_settings', 'public_metrics', 'source']
    )

    return tweets

In [None]:
# Load X IDs
df = pd.read_excel(input_file)
user_ids = df["X_ID"].astype(str).tolist()

# Load completed IDs
if done_file.exists():
    with done_file.open() as f:
        completed_ids = set(line.strip() for line in f)
else:
    completed_ids = set()

# Main loop
for uid in user_ids:
    if uid in completed_ids:
        continue

    try:
        posts = fetch_user_posts(uid, bearer_token, max_results=100)
        with output_file.open("a") as out:
            for post in posts:
                out.write(json.dumps({"user_id": uid, **post}) + "\n")

        with done_file.open("a") as done:
            done.write(uid + "\n")

    except Exception as e:
        print(f"Error for {uid}: {e}")
        break