In [None]:
!pip install tweepy==4.10.0 --q
!pip install wget --q

In [2]:
import tweepy
import json
import numpy as np
import pandas as pd
from PIL import Image
import wget

In [3]:
print('TweePy version is:', tweepy.__version__)

TweePy version is: 4.10.0


In [4]:
def authenticate(creds_path):
    '''
    read the credentials and authenticate to Twitter 
    :param creds_path: str, containing path to credentials file
    :return: api, the authenticated API object 
    '''
    with open(creds_path, mode='r') as f: creds = json.load(f)
    C_KEY, C_SECRET = creds['api_key'], creds['api_key_secret']
    A_TOKEN, A_TOKEN_SECRET = creds['access_token'], creds['access_token_secret']
    auth = tweepy.OAuthHandler(consumer_key=C_KEY, consumer_secret=C_SECRET)
    auth.set_access_token(key=A_TOKEN, secret=A_TOKEN_SECRET)
    api = tweepy.API(auth=auth, wait_on_rate_limit=True)
    return api

In [5]:
USR_NAME = 'MariusCrypt0'

api = authenticate(creds_path='/content/twitter_creds.json')
timeline = api.user_timeline(screen_name=USR_NAME, count=50, exclude_replies=True, include_rts=True,
                             tweet_mode='extended')

In [6]:
timeline_df = pd.DataFrame(data=[[len(tweet.full_text), tweet.full_text, tweet.created_at, tweet.user.screen_name] for tweet in timeline], columns = ['Tweet_Length', 'Tweet_Text', 'Tweet_Date', 'UserName'])
timeline_df.head(10)

Unnamed: 0,Tweet_Length,Tweet_Text,Tweet_Date,UserName
0,140,RT @IncomeSharks: #Alts - Look so close to jus...,2023-03-31 16:40:45+00:00,MariusCrypt0
1,140,RT @hasantoxr: Canva has over 125 million user...,2023-03-31 09:40:09+00:00,MariusCrypt0
2,140,RT @DSBatten: The report is up\n\nBitcoin's ma...,2023-03-31 07:41:03+00:00,MariusCrypt0
3,48,RT @CryptoKaleo: What if https://t.co/gS7yUKqLsj,2023-03-31 07:40:56+00:00,MariusCrypt0
4,144,RT @RealLittleFishh: #tehBag is almost 3 month...,2023-03-30 21:07:13+00:00,MariusCrypt0
5,28,How much of a Degen are you?,2023-03-30 16:16:22+00:00,MariusCrypt0
6,117,RT @alpha_pls: Arbitrum shows no signs of slow...,2023-03-30 16:02:32+00:00,MariusCrypt0
7,140,RT @MariusCrypt0: $LOOP is at such a good pric...,2023-03-30 15:21:13+00:00,MariusCrypt0
8,100,Sniped myself some floor and some rare Dev NFT...,2023-03-30 12:49:18+00:00,MariusCrypt0
9,212,"No matter how often someone dumps, $OPTI #OPTI...",2023-03-30 12:11:15+00:00,MariusCrypt0


In [21]:
def extract_status_time(tweet):
    return tweet.created_at.strftime('%a, %d/%B/%Y, %H:%M %Z')


def extract_tags_and_symbols(tweet, is_retweet):
    if is_retweet: 
        tags = tweet.retweeted_status.entities['hashtags']
        symbols = tweet.retweeted_status.entities['symbols']
    else:
        tags = tweet.entities['hashtags']
        symbols = tweet.entities['symbols']
    tags = ['#' + tag['text'] for tag in tags]
    symbols = ['$' + symbol['text'] for symbol in symbols]
    return tags, symbols


def download_media(tweet, is_retweet):
    if is_retweet:
        media_files = tweet.retweeted_status.entities.get('media', [])
    else:
        media_files = tweet.entities.get('media', [])

    for idx, media_file in enumerate(media_files):
        media_url = media_file['media_url']
        media_name = tweet.id_str + '_' + str(idx) + media_url[media_url.rfind('.'):]
        img = wget.download(url=media_url, out=media_name)
        print(f'downloded image no. {idx+1} successfully!')

    if not(len(media_files)): print('no downloadable images found!')
    return None


def print_author(tweet, is_retweet):
    if is_retweet:
        original_author_name = tweet.entities.get('user_mentions')[0].get('name')
        original_author_handle = tweet.entities.get('user_mentions')[0].get('screen_name')
        print('RT:', original_author_name + ' @' + original_author_handle)
    else:
        print('Originally tweeted by:', tweet.author.name + ' @' + tweet.author.screen_name)
    return None


def extract_full_text(tweet, is_retweet):
    return tweet.retweeted_status.full_text if is_retweet else tweet.full_text

In [20]:
for status in timeline:
    print('--------------------------------------')
    is_retweet = status.full_text.startswith('RT')
    status_time = extract_status_time(status)
    print('tweet posted at: ', status_time)
    tags, symbols = extract_tags_and_symbols(status, is_retweet=is_retweet)
    print('tweet has these tags:', tags)
    print('tweet has these symbols:', symbols)
    print_author(status, is_retweet=is_retweet)
    text = extract_full_text(status, is_retweet=is_retweet)
    print('full tweet text: \n', text)
    # download_media(status, is_retweet=is_retweet)
    print('--------------------------------------')
    break

--------------------------------------
tweet posted at:  Fri, 31/March/2023, 16:40 UTC
tweet has these tags: ['#Alts']
tweet has these symbols: []
RT: IncomeSharks @IncomeSharks
full tweet text: 
 #Alts - Look so close to just going wild. I wouldn't be surprised if this is the 3rd time we've started an alt coin rally on April Fools Day (April 1st) https://t.co/6AHmvBqC3p
--------------------------------------
