# @robotodio

## Libraries

In [112]:
# Reading files with different formats
import json

# Data wrangling
import pandas as pd
import numpy as np

# Twitter
import tweepy

## Download data from twitter

In [125]:
# API Twitter credentials
# ------------------------------------------------------------------------------

# Open .json file containing credentials/tokens as a dictionary
with open("twitter_api_keys.json") as file:
    api_credentials = json.load(file)
    
# Assign each value of the dictionary to a new variable
consumer_key = api_credentials['consumer_key']
consumer_secret = api_credentials['consumer_secret']
access_token = api_credentials['access_token']
access_token_secret = api_credentials['access_token_secret']

In [126]:
# API set up
# ------------------------------------------------------------------------------

# Create a handler instance with key and secret consumer, and pass the tokens
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
    
# Instance the API authorization
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# Check credentials
if(api.verify_credentials):
    print("Logged In Successfully")
else:
    print("Error -- Could not log in with your credentials")

Logged In Successfully


In [127]:
# Tweet extractor
# ------------------------------------------------------------------------------
# Introduce the target Twitter account and number of items to download
target = 'lexfridman'
n_items = 20

# Tweets list (iterator)
tweets = tweepy.Cursor(
    api.user_timeline,
    screen_name=target,
    tweet_mode='extended').items(n_items)

# Read through the iterator, and export the info to a Pandas DataFrame
all_columns = [np.array([
    tweet.full_text,
    tweet.user.screen_name,
    tweet.id,
    tweet.source,
    tweet.created_at,
    len(tweet.full_text),
    tweet.favorite_count,
    tweet.retweet_count,
    str(tweet.entities['hashtags'])
]) for tweet in tweets]

# Export the list of tweets to a dataframe
df = pd.DataFrame(
    data=all_columns,
    columns=['tweet', 'id', 'account', 'source', 'date', 'length', 'likes',
             'RTs', 'hashtags']
)

df

<tweepy.cursor.ItemIterator at 0x231f260ba00>

- https://tomasreneboldi.medium.com/c%C3%B3mo-obtener-datos-de-twitter-hacer-todo-esto-con-tweepy-3-5-21d60cd6e2c1
- http://rios.tecnm.mx/cdistribuido/recursos/MinDatScr/MineriaScribble.html

In [128]:
for k, tweet in enumerate(tweets):
    print(json.dumps(tweet._json, indent=4))
    if k == 1:
        break

{
    "created_at": "Mon Jan 11 11:00:46 +0000 2021",
    "id": 1348585639630041092,
    "id_str": "1348585639630041092",
    "full_text": "Here's my 2nd conversation with Dmitry Korkin (@DmKorkin) about protein folding, AlphaFold 2, evolution of viruses, origin of life on Earth, and the future of AI in computational biology. https://t.co/kyFDKBK5po https://t.co/HHKV1tzxWS",
    "truncated": false,
    "display_text_range": [
        0,
        211
    ],
    "entities": {
        "hashtags": [],
        "symbols": [],
        "user_mentions": [
            {
                "screen_name": "DmKorkin",
                "name": "Dmitry Korkin",
                "id": 3332196742,
                "id_str": "3332196742",
                "indices": [
                    47,
                    56
                ]
            }
        ],
        "urls": [
            {
                "url": "https://t.co/kyFDKBK5po",
                "expanded_url": "https://www.youtube.com/watch?v=I51DuprOb0