In [17]:
import tweepy 
import os
from dotenv import load_dotenv
import pandas as pd
import json
from collections import Counter
import re
from datetime import datetime
import time

load_dotenv()

True

In [32]:
class X_tweets:
    def __init__(self):
        self.bearer_token = os.environ.get("TWITTER_BEARER_TOKEN")
        if not self.bearer_token:
            print("❌ Please add your Bearer Token to the .env file")
            return
        
        self.client = tweepy.Client(bearer_token=self.bearer_token)
        self.last_request_time = 0
        self.min_delay = 5  # 5 seconds between requests
        print("✅ Bearer token loaded with rate limiting")

    def _rate_limited_request(self):
        """Ensure minimum delay between requests"""
        current_time = time.time()
        time_since_last = current_time - self.last_request_time
        if time_since_last < self.min_delay:
            sleep_time = self.min_delay - time_since_last
            time.sleep(sleep_time)
        self.last_request_time = time.time()

    def get_user_info(self, username):
        """get basic details of the user"""
        self._rate_limited_request()
        
        try:
            user = self.client.get_user(
                username=username,
                user_fields=["public_metrics", "description", "created_at"]
            )
            return user.data
        except tweepy.TooManyRequests:
            print("⏰ Rate limit hit! Waiting 15 minutes...")
            time.sleep(900)  # Wait 15 minutes
            return self.get_user_info(username)  # Retry
        except Exception as e:
            print(f"❌ Error fetching user info: {e}")
            return None
    
    def get_users_tweets(self, username, max_tweets=10):
        """fetch tweets from username"""  # ✅ Fixed: now takes username
        self._rate_limited_request()
        
        try:
            # First get user to get ID
            user = self.client.get_user(username=username)
            time.sleep(3)  # Extra wait before tweets
            
            tweets = self.client.get_users_tweets(
                user.data.id,  # ✅ Fixed: use user.data.id
                max_results=max_tweets,
                tweet_fields=["created_at", "text", "public_metrics", "context_annotations"],
                exclude=["retweets", "replies"]
            )
            return tweets.data if tweets.data else []
        except tweepy.TooManyRequests:
            print("⏰ Rate limit hit on tweets! Waiting 15 minutes...")
            time.sleep(900)
            return self.get_users_tweets(username, max_tweets)  # Retry
        except Exception as e:
            print(f"❌ Error fetching tweets: {e}")
            return []

Tweets = X_tweets()

✅ Bearer token loaded with rate limiting


In [41]:
class X_profile_analytics:
    def __init__(self, tweets):
        self.tweets = tweets
        self.df = self.tweets_to_dataframe()
        self.username = ""  # Will be set from calling function
        
    def tweets_to_dataframe(self):
        """Convert tweets to comprehensive DataFrame"""
        data = []
        for tweet in self.tweets:
            # Calculate engagement score
            engagement = (tweet.public_metrics['like_count'] + 
                         tweet.public_metrics['retweet_count'] + 
                         tweet.public_metrics['reply_count'] + 
                         tweet.public_metrics['quote_count'])
            
            # Extract tweet features
            text = tweet.text
            word_count = len(text.split())
            has_hashtags = '#' in text
            has_mentions = '@' in text
            has_links = 'http' in text or 'https' in text
            is_thread = text.startswith('1/') or '1/' in text[:10]
            
            # Posting time analysis
            created_at = tweet.created_at
            hour = created_at.hour
            day_of_week = created_at.strftime('%A')
            
            data.append({
                'text': text,
                'created_at': created_at,
                'hour': hour,
                'day_of_week': day_of_week,
                'likes': tweet.public_metrics['like_count'],
                'retweets': tweet.public_metrics['retweet_count'],
                'replies': tweet.public_metrics['reply_count'],
                'quotes': tweet.public_metrics['quote_count'],
                'engagement': engagement,
                'word_count': word_count,
                'has_hashtags': has_hashtags,
                'has_mentions': has_mentions,
                'has_links': has_links,
                'is_thread': is_thread,
                'engagement_rate': engagement / max(tweet.public_metrics['like_count'], 1)
            })
        
        df = pd.DataFrame(data)
        df['created_at'] = pd.to_datetime(df['created_at'])
        return df

    def get_top_tweets_detailed(self, n=3):
        """Get detailed top tweets with full analysis"""
        top_tweets = self.df.nlargest(n, 'engagement')
        
        print("🔥 TOP PERFORMING TWEETS (Detailed Analysis)")
        print("=" * 50)
        
        for idx, (_, tweet) in enumerate(top_tweets.iterrows(), 1):
            print(f"\n{idx}. 📈 ENGAGEMENT: {tweet['engagement']:,}")
            print(f"   👍 Likes: {tweet['likes']:,} | 🔄 RT: {tweet['retweets']:,} | 💬 Replies: {tweet['replies']:,}")
            print(f"   🕐 Posted: {tweet['created_at'].strftime('%Y-%m-%d %H:%M')}")
            print(f"   📝 Words: {tweet['word_count']} | Thread: {'Yes' if tweet['is_thread'] else 'No'}")
            
            # Show tweet text (truncated if too long)
            text = tweet['text']
            if len(text) > 200:
                print(f"   💭 \"{text[:200]}...\"")
            else:
                print(f"   💭 \"{text}\"")
            
            # Performance insights
            if tweet['engagement'] > self.df['engagement'].mean() * 2:
                print("   ⭐ OUTSTANDING performance!")
            elif tweet['engagement'] > self.df['engagement'].mean():
                print("   ✅ Above average performance")
        
        return top_tweets

    def analyze_posting_patterns(self):
        """Analyze when tweets perform best"""
        print("\n⏰ POSTING PATTERN ANALYSIS")
        print("-" * 30)
        
        # Best hours
        hourly_engagement = self.df.groupby('hour')['engagement'].mean()
        best_hour = hourly_engagement.idxmax()
        best_hour_engagement = hourly_engagement.max()
        
        print(f"🕐 Best posting hour: {best_hour}:00 ({best_hour_engagement:.1f} avg engagement)")
        
        # Best days
        daily_engagement = self.df.groupby('day_of_week')['engagement'].mean()
        best_day = daily_engagement.idxmax()
        best_day_engagement = daily_engagement.max()
        
        print(f"📅 Best posting day: {best_day} ({best_day_engagement:.1f} avg engagement)")
        
        # Content type analysis
        thread_engagement = self.df[self.df['is_thread']]['engagement'].mean()
        single_engagement = self.df[~self.df['is_thread']]['engagement'].mean()
        
        print("🧵 Content Performance:")
        print(f"   Thread tweets: {thread_engagement:.1f} avg engagement")
        print(f"   Single tweets: {single_engagement:.1f} avg engagement")
        
        if thread_engagement > single_engagement:
            print("   💡 Threads perform better than single tweets")
        else:
            print("   💡 Single tweets perform better than threads")
        
        return {
            'best_hour': best_hour,
            'best_day': best_day,
            'thread_advantage': thread_engagement > single_engagement
        }

    def analyze_content_types(self):
        """Analyze what types of content perform best"""
        print("\n📊 CONTENT TYPE ANALYSIS")
        print("-" * 25)
        
        # Hashtag analysis
        hashtag_tweets = self.df[self.df['has_hashtags']]
        no_hashtag_tweets = self.df[~self.df['has_hashtags']]
        
        hashtag_avg = hashtag_tweets['engagement'].mean() if len(hashtag_tweets) > 0 else 0
        no_hashtag_avg = no_hashtag_tweets['engagement'].mean() if len(no_hashtag_tweets) > 0 else 0
        
        print("🏷️ Hashtag Performance:")
        print(f"   With hashtags: {hashtag_avg:.1f} avg engagement")
        print(f"   Without hashtags: {no_hashtag_avg:.1f} avg engagement")
        
        if hashtag_avg > no_hashtag_avg:
            print("   💡 Tweets with hashtags perform better")
        else:
            print("   💡 Tweets without hashtags perform better")
        
        # Link analysis
        link_tweets = self.df[self.df['has_links']]
        no_link_tweets = self.df[~self.df['has_links']]
        
        link_avg = link_tweets['engagement'].mean() if len(link_tweets) > 0 else 0
        no_link_avg = no_link_tweets['engagement'].mean() if len(no_link_tweets) > 0 else 0
        
        print("🔗 Link Performance:")
        print(f"   With links: {link_avg:.1f} avg engagement")
        print(f"   Without links: {no_link_avg:.1f} avg engagement")
        
        if link_avg > no_link_avg:
            print("   💡 Tweets with links perform better")
        else:
            print("   💡 Tweets without links perform better")
        
        return {
            'hashtag_advantage': hashtag_avg > no_hashtag_avg,
            'link_advantage': link_avg > no_link_avg
        }

    def extract_hook_lines_advanced(self):
        """Advanced hook line analysis"""
        print("\n🎣 HOOK LINE ANALYSIS")
        print("-" * 20)
        
        # Get first sentences/phrases
        hooks = []
        for text in self.df['text']:
            # Get first sentence or first 50 chars
            if '.' in text[:100]:
                hook = text.split('.')[0] + '.'
            else:
                hook = text[:50] + '...'
            hooks.append(hook)
        
        hook_counts = Counter(hooks)
        top_hooks = hook_counts.most_common(5)
        
        print("Most used opening lines:")
        for i, (hook, count) in enumerate(top_hooks, 1):
            if count > 1:  # Only show repeated hooks
                print(f"{i}. \"{hook}\" (used {count}x)")
        
        return top_hooks

    def analyze_topics_advanced(self):
        """Advanced topic analysis with better keyword extraction"""
        print("\n🏷️ ADVANCED TOPIC ANALYSIS")
        print("-" * 25)
        
        all_text = ' '.join(self.df['text'])
        
        # Better word extraction (remove very common words)
        words = re.findall(r'\b\w{4,}\b', all_text.lower())
        
        # Expanded stop words
        stop_words = {
            'this', 'that', 'with', 'have', 'will', 'from', 'they', 'been', 'said', 'each', 'which', 'their',
            'time', 'will', 'about', 'would', 'there', 'could', 'other', 'what', 'when', 'where', 'here',
            'come', 'came', 'some', 'them', 'then', 'than', 'were', 'like', 'just', 'know', 'take', 'into',
            'year', 'your', 'good', 'want', 'give', 'most', 'these', 'also', 'well', 'only', 'very', 'even',
            'back', 'make', 'much', 'work', 'life', 'people', 'think', 'going', 'still', 'after', 'first'
        }
        
        # Filter words
        meaningful_words = [word for word in words if word not in stop_words and len(word) > 3]
        
        # Get hashtags
        hashtags = re.findall(r'#\w+', all_text.lower())
        
        # Show results
        keywords = Counter(meaningful_words).most_common(10)
        hashtag_counts = Counter(hashtags).most_common(10)
        
        if keywords:
            print("Top Keywords:")
            for keyword, count in keywords[:8]:
                print(f"  {keyword}: {count}")
        
        if hashtag_counts:
            print("\nTop Hashtags:")
            for hashtag, count in hashtag_counts[:5]:
                print(f"  {hashtag}: {count}")
        
        return {
            'keywords': keywords,
            'hashtags': hashtag_counts
        }

    def generate_recommendations(self):
        """Generate actionable recommendations"""
        print("\n💡 ACTIONABLE RECOMMENDATIONS")
        print("-" * 30)
        
        # Analyze patterns
        patterns = self.analyze_posting_patterns()
        content = self.analyze_content_types()
        
        recommendations = []
        
        # Posting time recommendations
        if patterns['best_hour'] < 12:
            recommendations.append(f"📅 Post more during morning hours (around {patterns['best_hour']}:00)")
        else:
            recommendations.append(f"📅 Post more during evening hours (around {patterns['best_hour']}:00)")
        
        recommendations.append(f"📊 Focus on posting on {patterns['best_day']}s")
        
        # Content recommendations
        if patterns['thread_advantage']:
            recommendations.append("🧵 Create more threads - they perform better")
        else:
            recommendations.append("📝 Focus on single impactful tweets")
        
        if content['hashtag_advantage']:
            recommendations.append("🏷️ Use relevant hashtags in your tweets")
        else:
            recommendations.append("🚫 Avoid unnecessary hashtags")
        
        if content['link_advantage']:
            recommendations.append("🔗 Include links when sharing resources")
        
        # Engagement recommendations
        avg_engagement = self.df['engagement'].mean()
        if avg_engagement < 100:
            recommendations.append("📈 Work on increasing overall engagement")
        elif avg_engagement > 1000:
            recommendations.append("🚀 Your content strategy is working well!")
        
        # Display recommendations
        for i, rec in enumerate(recommendations, 1):
            print(f"{i}. {rec}")
        
        return recommendations

    def engagement_stats(self):
        """Enhanced engagement statistics"""
        if len(self.df) == 0:
            return {'error': 'No tweets to analyze'}
        
        # Calculate percentiles
        engagement_75th = self.df['engagement'].quantile(0.75)
        engagement_median = self.df['engagement'].median()
        
        stats = {
            'total_tweets': len(self.df),
            'avg_likes': round(self.df['likes'].mean(), 1),
            'avg_retweets': round(self.df['retweets'].mean(), 1),
            'avg_replies': round(self.df['replies'].mean(), 1),
            'avg_engagement': round(self.df['engagement'].mean(), 1),
            'median_engagement': round(engagement_median, 1),
            'top_25_percentile': round(engagement_75th, 1),
            'best_performing': self.df['engagement'].max(),
            'engagement_rate': f"{round(self.df['engagement'].sum() / len(self.df), 1)} per tweet",
            'consistency_score': round(self.df['engagement'].std() / self.df['engagement'].mean(), 2)
        }
        
        # Add interpretation
        if stats['consistency_score'] < 0.5:
            stats['consistency_note'] = "Very consistent engagement"
        elif stats['consistency_score'] < 1.0:
            stats['consistency_note'] = "Moderately consistent engagement"
        else:
            stats['consistency_note'] = "Highly variable engagement"
        
        return stats

print("✅ Enhanced analytics functions ready!")

✅ Enhanced analytics functions ready!


In [43]:
username = "im_roy_lee"

print("🚀 COMPREHENSIVE TWITTER PROFILE ANALYSIS")
print("=" * 60)
print(f"📊 Analyzing: @{username}")
print("=" * 60)

# Step 1: Get user info
user_info = Tweets.get_user_info(username)

if user_info:
    print(f"👤 {user_info.name} (@{user_info.username})")
    print(f"📈 Followers: {user_info.public_metrics['followers_count']:,}")
    print(f"📝 Bio: {user_info.description[:120]}...")
    print()
    
    # Step 2: Wait before tweets
    print("⏳ Preparing tweet analysis...")
    time.sleep(10)
    
    # Step 3: Get tweets
    print("📥 Fetching recent tweets...")
    tweets = Tweets.get_users_tweets(username, max_tweets=20)
    
    if tweets:
        print(f"✅ Successfully fetched {len(tweets)} tweets for analysis")
        print("_" * 60)
        
        # Step 4: Initialize enhanced analyzer
        tweet_analyzer = X_profile_analytics(tweets)
        tweet_analyzer.username = username
        
        # Step 5: Comprehensive Analysis
        
        # 1. Engagement Statistics
        print("📈 ENGAGEMENT OVERVIEW")
        print("-" * 25)
        stats = tweet_analyzer.engagement_stats()
        for key, value in stats.items():
            if key != 'consistency_note' and 'error' not in key:
                print(f"{key.replace('_', ' ').title()}: {value}")
        if 'consistency_note' in stats:
            print(f"Consistency: {stats['consistency_note']}")
        print()
        
        # 2. Top Tweets Analysis
        tweet_analyzer.get_top_tweets_detailed(3)
        
        # 3. Posting Patterns
        patterns = tweet_analyzer.analyze_posting_patterns()
        
        # 4. Content Analysis
        content = tweet_analyzer.analyze_content_types()
        
        # 5. Hook Analysis
        tweet_analyzer.extract_hook_lines_advanced()
        
        # 6. Topic Analysis
        tweet_analyzer.analyze_topics_advanced()
        
        # 7. Recommendations
        tweet_analyzer.generate_recommendations()
        
        print(f"\n🎉 Analysis complete for @{username}!")
        print("Use these insights to optimize your Twitter strategy! 🚀")
        
    else:
        print("❌ Could not fetch tweets")
        print("💡 This might be due to rate limiting. Wait 15 minutes and try again.")
        
else:
    print("❌ Could not get user information")
    print("💡 Check your Bearer Token or try a different username")

🚀 COMPREHENSIVE TWITTER PROFILE ANALYSIS
📊 Analyzing: @im_roy_lee
👤 Roy (@im_roy_lee)
📈 Followers: 152,357
📝 Bio: CEO at Cluely (@cluely) | Kicked out of Columbia and Harvard | i am the man who killed leetcode | @zfellows...

⏳ Preparing tweet analysis...
📥 Fetching recent tweets...
✅ Successfully fetched 20 tweets for analysis
____________________________________________________________
📈 ENGAGEMENT OVERVIEW
-------------------------
Total Tweets: 20
Avg Likes: 1971.2
Avg Retweets: 54.3
Avg Replies: 108.9
Avg Engagement: 2159.3
Median Engagement: 490.0
Top 25 Percentile: 1577.2
Best Performing: 20553
Engagement Rate: 2159.3 per tweet
Consistency Score: 2.15
Consistency: Highly variable engagement

🔥 TOP PERFORMING TWEETS (Detailed Analysis)

1. 📈 ENGAGEMENT: 20,553
   👍 Likes: 19,532 | 🔄 RT: 602 | 💬 Replies: 361
   🕐 Posted: 2025-08-09 19:48
   📝 Words: 7 | Thread: No
   💭 "we r live in times square https://t.co/YcdFYaucGe"
   ⭐ OUTSTANDING performance!

2. 📈 ENGAGEMENT: 7,193
   👍 Li