In [None]:
import tweepy
import time
import pandas as pd
import numpy as np
import yaml


In [None]:

with open("config/config.yml", "r") as file:
    config = yaml.safe_load(file)

creds = config["twitter_credentials"]

bearer_token = creds["bearer_token"]

In [13]:
# Initialize the API client
client = tweepy.Client(bearer_token=bearer_token)

### 1. Single get_user Call

In [19]:
# Step 1: Get the user ID of @bundeskanzler
username = "_FriedrichMerz"
user = client.get_user(username=username)
user_id = user.data.id
print(f"User ID for @bundeskanzler: {user_id}")

TooManyRequests: 429 Too Many Requests
Too Many Requests

In [None]:
# Step 2: Get recent tweets (up to 100 per request)
tweets = client.get_users_tweets(
    id=user_id, 
    max_results=5,
    tweet_fields=["created_at", "lang", "public_metrics", "source"]
)

# Step 3: Print tweets
for tweet in tweets.data:
    print(f"- {tweet.text}\n")


In [None]:
columns=['tweet_id', 'text', 'attachments', 'author_id', 'conversation_id', 
         'created_at', 'in_reply_to_user_id', 'lang', 'non_public_metrics', 
         'organic_metrics', 'possibly_sensitive', 'promoted_metrics', 'public_metrics', 
         'referenced_tweets', 'reply_settings', 'source']
tweets_df = pd.DataFrame([[None]*len(columns)], columns=columns)

In [None]:
tweets_df

In [None]:
try:
    tweets = client.get_users_tweets(
        id=user_id,
        max_results=100,
        tweet_fields=['id', 'text', 'attachments', 'author_id', 'conversation_id', 'created_at', 'in_reply_to_user_id', 'lang', 'non_public_metrics', 'organic_metrics', 'possibly_sensitive', 'promoted_metrics', 'public_metrics', 'referenced_tweets', 'reply_settings', 'public_metrics', 'source']
    )


except tweepy.TooManyRequests as e:
    print("Rate limit exceeded. Please wait before trying again.")
    print(f"Retry after: {e.response.headers.get('x-rate-limit-reset')}")
    # You could wait programmatically if needed:
    time.sleep(15 * 60)


### 2. Load Pandas Dataframe

In [11]:
politicians_df = pd.read_excel("data/politicians3.xlsx", sheet_name="Sheet1", engine='openpyxl')

### 3. Add User ID to Politicians DF

In [20]:
def get_x_id(username):
    """
    Simulate getting the X (Twitter) ID from a username.
    Replace this with a real API call using Tweepy or requests.
    """
    try:
        user = client.get_user(username=username)
        return user.data.id
    except tweepy.TooManyRequests:
        print("Rate limit reached. Sleeping for 15 minutes...")
        time.sleep(15 * 60)  # 15 minutes
        return get_x_id(username)  # Retry after waiting
    except tweepy.NotFound:
        print(f"User '{username}' not found.")
        return np.nan  # User not found
    except tweepy.Unauthorized:
        print(f"Access denied for username '{username}'.")
        return np.nan  # Access denied
    except Exception as e:
        print(f"Unexpected error for username '{username}': {e}")
        return np.nan

In [21]:
def safe_get_x_id(username):
    if pd.isna(username) or username.lower() == "gelöscht":
        return np.nan
    return get_x_id(username)

In [22]:
test_df = politicians_df.head(10)

In [23]:
politicians_df["X_ID"] = politicians_df["USERNAME"].apply(safe_get_x_id)

Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit reached. Sleeping for 15 minutes...
Rate limit re

In [None]:
# prolematic usernames:
# MircoHanker
# linda_lobster
# HakanDemirNK
# 

In [24]:
politicians_df.to_excel('data/politicians4.xlsx', index=False)

In [27]:
politicians_df.columns

Index(['ID', 'GEBURTSDATUM', 'GEBURTSORT', 'GEBURTSLAND', 'STERBEDATUM',
       'GESCHLECHT', 'FAMILIENSTAND', 'RELIGION', 'BERUF', 'PARTEI_KURZ',
       'VITA_KURZ', 'VEROEFFENTLICHUNGSPFLICHTIGES', 'NACHNAME', 'VORNAME',
       'ORTSZUSATZ', 'ADEL', 'PRAEFIX', 'ANREDE_TITEL', 'AKAD_TITEL',
       'HISTORIE_VON', 'HISTORIE_BIS', 'WP_WP', 'WP_MDBWP_VON', 'WP_MDBWP_BIS',
       'WP_WKR_NUMMER', 'WP_WKR_NAME', 'WP_WKR_LAND', 'WP_LISTE',
       'WP_MANDATSART', 'WP_INSTITUTIONEN', 'USERNAME', 'X_ID'],
      dtype='object')

In [31]:
politicians_df[['ID', 'NACHNAME', 'VORNAME', 'USERNAME', 'X_ID']].tail(20)

Unnamed: 0,ID,NACHNAME,VORNAME,USERNAME,X_ID
612,11005603,Vollath,Sarah,,
613,11005604,Wagner,Sascha,,
614,11005605,Walch,Siegfried,SiegfriedWalch,56328380.0
615,11005606,Walter,Daniel,daniel_walter_,1.326838e+18
616,11005607,Weiser,Mathias,WeiserMathias,1.938687e+18
617,11005608,Weiss,Claudia,,
618,11005609,Wendorf,Sven,SvenWendorf,1.036697e+18
619,11005610,Wiegelmann,Johannes,JoWiegelmann,72551000.0
620,11005611,Willnat,Christin,,
621,11005612,Winkel,Johannes,johwinkel,473461900.0


In [18]:
politicians_df[['ID', 'NACHNAME', 'VORNAME', 'USERNAME', 'X_ID']].head(50)

Unnamed: 0,ID,NACHNAME,VORNAME,USERNAME,X_ID
0,11000756,Gysi,Gregor,GregorGysi,888289790.0
1,11002718,Laschet,Armin,ArminLaschet,835541664.0
2,11002720,Lemke,Steffi,,
3,11002733,Meister,Michael,meister_schafft,34214048.0
4,11002735,Merz,Friedrich,_FriedrichMerz,
5,11002735,Merz,Friedrich,bundeskanzler,
6,11002754,Rachel,Thomas,_ThomasRachel,
7,11002765,Röttgen,Norbert,n_roettgen,
8,11003034,Bartsch,Dietmar,DietmarBartsch,
9,11003132,Göring-Eckardt,Katrin Dagmar,GoeringEckardt,


In [None]:
import tweepy

# Initialize the client
client = tweepy.Client(bearer_token=bearer_token)

# Replace with the tweet ID you want to inspect
tweet_id = 1943616979077648534

# Request the tweet with public metrics
tweet = client.get_tweet(
    id=tweet_id,
    tweet_fields=["public_metrics", "created_at", "lang", "source"]
)

# Access the tweet data
data = tweet.data

print("Tweet Text:", data.text)
print("Likes:", data.public_metrics["like_count"])
print("Retweets:", data.public_metrics["retweet_count"])
print("Replies:", data.public_metrics["reply_count"])
print("Quotes:", data.public_metrics["quote_count"])
print("Created at:", data.created_at)
print("Language:", data.lang)
print("Source:", data.source)
