In [None]:
#Install required packages (run once per env)

!pip -q install tweepy python-dotenv pandas vaderSentiment matplotlib

In [None]:
import os
import re
import math
import time
import unicodedata
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from dotenv import load_dotenv
import tweepy

# Load env
load_dotenv()
BEARER = os.getenv("X_BEARER_TOKEN")
if not BEARER:
    raise RuntimeError("X_BEARER_TOKEN not found. Please create a .env file with X_BEARER_TOKEN.")
client = tweepy.Client(bearer_token=BEARER, wait_on_rate_limit=True)

# Define search queries
We focus on Australian politics. Note the Australian Labor Party is spelled **Labor** (not Labour). You can tweak '''MAX_TWEET_PER_QUERY''', date window, and the keyword lists.

In [None]:
# Query config
MAX_TWEETS_PER_QUERY = 100 # Adjust based on your API tier/limits
DAYS_BACK = 30 # Adjust based on period of time, here we are analysing sentiment in the past month
LANG = "en" # English only

SINCE = (dt.datetime.utcnow() - dt.timedelta(days=DAYS_BACK)).strftime("%Y-%m-%dT%H:%M:%SZ")

# Hastags and Aussie context terms

AUS_CONTEXT = ["auspol", "Australia", "Australian", "Canberra", "Parliament"]

# Party-focused keyword sets

LABOR_TERMS = [
    "Australian Labor Party", "ALP", "Labor Australia", "Anthony Albanese", "Albo", "@AlboMP", "@AustralianLabor"
]

LIBERAL_TERMS = [
    "Liberal Party of Australia", "Liberal Australia", "LNP", "Coalition Australia", "Susan Ley"
]

def build_query(terms):
    # Compose an OR list, exclude retweets, filter by language.
    or_block = " OR ".join([f'("{t}")' if " " in t else t for t in terms + AUS_CONTEXT])
    # Avoid very common noise words here; you can add -is:reply to limit to original posts
    q = f"({or_block}) lang:{LANG} -is:retweet"
    return q

QUERIES = {
    "Labor": build_query(LABOR_TERMS),
    "Liberal": build_query(LIBERAL_TERMS),
}

QUERIES

# Fetch recent post with expansions and metrics

In [None]:
def fetch_recent_safe(query, max_results, since_iso, client):
    results = []
    per_call = 100 if max_results >= 100 else max_results
    next_token = None
    fetched = 0
    
    print(f"🔍 Fetching: '{query}' (target: {max_results} tweets)")
    
    while fetched < max_results:
        try:
            remaining = max_results - fetched
            current_batch = min(per_call, remaining)
            
            print(f"⏳ Progress: {fetched}/{max_results} tweets", end="\r")
            
            resp = client.search_recent_tweets(
                query=query,
                max_results=current_batch,
                start_time=since_iso,
                tweet_fields=["id","text","lang","created_at","public_metrics","possibly_sensitive","source"],
                user_fields=["username","name","public_metrics","verified"],
                expansions=["author_id"],
                next_token=next_token
            )
            
            if resp.data:
                # Process tweets (simplified version of your existing logic)
                tweets = []
                users_dict = {}
                
                if resp.includes and 'users' in resp.includes:
                    users_dict = {user.id: user for user in resp.includes['users']}
                
                for tweet in resp.data:
                    tweet_data = {
                        'id': tweet.id,
                        'text': tweet.text,
                        'created_at': tweet.created_at.isoformat() if tweet.created_at else None,
                        'lang': tweet.lang,
                        'public_metrics': tweet.public_metrics,
                        'author_id': tweet.author_id
                    }
                    
                    if tweet.author_id in users_dict:
                        user = users_dict[tweet.author_id]
                        tweet_data.update({
                            'username': user.username,
                            'name': user.name,
                            'verified': user.verified
                        })
                    
                    tweets.append(tweet_data)
                
                results.extend(tweets)
                fetched += len(tweets)
                
                if hasattr(resp, 'meta') and 'next_token' in resp.meta:
                    next_token = resp.meta['next_token']
                else:
                    break
            else:
                break
                
        except tweepy.TooManyRequests:
            print(f"\n⚠️  Rate limited! Waiting 60s... (Press Ctrl+C to interrupt)")
            try:
                # Interruptible countdown
                for i in range(60, 0, -1):
                    print(f"\r⏳ {i:2d}s remaining", end="", flush=True)
                    time.sleep(1)
                print(f"\r✅ Resuming...           ")
            except KeyboardInterrupt:
                print(f"\n🛑 Interrupted during rate limit wait. Returning {len(results)} tweets.")
                return results
                
        except KeyboardInterrupt:
            print(f"\n🛑 Fetch interrupted. Returning {len(results)} tweets collected so far.")
            return results
            
    print(f"\n✅ Complete: {len(results)} tweets collected")
    return results