# Twint Overview
This notebook will serve as a guide for using Twint to source Twitter data. See https://github.com/twintproject/twint as a reference for installation of the library. We recommend using `pip3 install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master#egg=twint` as an installation script rather than vanilla pip. This is not included in Anaconda.


Twint acts like a headless browser, querying the Twitter `/search` endpoint for the results of your search configuration. Listed below are some utility functions to be able to get started.

Do note that Twitter limits scrolls while browsing the user timeline. This means that with `.Profile` or with `.Favorites` you will be able to get at most ~3200 tweets.

In [6]:
# Basic import statements
# Asyncio is necessary to allow Jupyter to run Twint in notebook
import twint
import nest_asyncio
import json
nest_asyncio.apply()

Below are some utility functions that modify Twint's config for easy import into a Pandas dataframe

In [2]:
# Initialization for the Twint config object
def set_config():
    c = twint.Config()
    c.Hide_output = True
    return c

# Get a user's profile data (bio on Tw profile)
def get_user_profile(username):
    c = set_config()
    c.Username = username
    c.User_full = True
    c.Store_object = True
    twint.run.Lookup(c)
    user = twint.output.users_list[-1]
    user_object = {"id": user.id,
                  "name": user.name,
                  "username": user.username,
                  "bio": user.bio,
                  "location": user.location,
                  "url": user.url,
                  "join_date": user.join_date,
                  "join_time": user.join_time,
                  "tweets": user.tweets,
                  "following": user.following,
                  "followers": user.followers,
                  "likes": user.likes,
                  "media": user.media_count,
                  "private": user.is_private,
                  "verified": user.is_verified,
                  "avatar": user.avatar,
                  "background_image": user.background_image}
    twint.output.clean_lists()
    return user_object

# Scan through a user's tweets (inclusive of QTs and RTs) for a search parameter
# and return matching results
def search_user_tweets(username, search_key):
    c = set_config()
    twint.output.clean_lists()
    tweets = []
    c.Username = username
    c.Search = search_key 
    c.Store_object = True
    twint.run.Search(c)
    tweets = format_tweets(twint.output.tweets_list)
    return tweets
    
# Get a user's n-number of tweets historically. Usually caps out at 3200.
def get_user_tweets(username, limit=0):
    c = set_config()
    twint.output.clean_lists()
    c.Username = username
    c.Limit = limit
    c.Store_object = True
    tweets = []
    tweets.clear()
    c.Store_object_tweets_list = tweets
    twint.run.Profile(c)
    formatted = format_tweets(tweets)
    print(len(tweets))
    return formatted

# Get a user's media/photo tweets historically. Usually caps out at 3200.
def get_user_media_tweets(username, limit=0):
    c = set_config()
    twint.output.clean_lists()
    c.Username = username
    c.Media = True
    c.Limit = limit
    c.Store_object = True
    twint.run.Search(c)
    tweets = format_tweets(twint.output.tweets_list)
    return tweets

# Get all tweets with public geolocation data included within a specified radius and centroid
def get_tweets_geofenced(lat, lon, radius, limit=0):
    c = set_config()
    c.Geo = f"{lat},{lon},{radius}km"
#     c.Since ="2021-05-23"
#     c.Until ="2021-05-24"
    twint.output.clean_lists()
    c.Limit = limit
    c.Store_object = True
    twint.run.Search(c)
    tweets = format_tweets(twint.output.tweets_list)
    return tweets

# Scan through Twitter and get n-number of tweets that match a string historically
def search_tweets(topic, limit=0):
    c = set_config()
    twint.output.clean_lists()
    c.Search = topic
    c.Limit = limit
    c.Store_object = True
    twint.run.Search(c)
    tweets = format_tweets(twint.output.tweets_list)
    return tweets

# Formatting code to convert JSON to a Python dictionary for ease of use
def format_tweets(twint_object):
    tweets = []
    for tweet in twint_object:
        tweets.append({"tweet_id": tweet.id,
                       "user_id": tweet.user_id,
                      "username": tweet.username,
                      "name": tweet.name,
                      "datestamp": tweet.datestamp,
                      "timestamp": tweet.timestamp,
                      "place": tweet.place,
                      "tweet": tweet.tweet,
                      "mentions": tweet.mentions,
                      "urls": tweet.urls,
                      "photos": tweet.photos,
                      "replies_count": tweet.replies_count,
                      "likes_count": tweet.likes_count,
                      "hashtags": tweet.hashtags,
                      "cashtags": tweet.cashtags,
                      "link": tweet.link,
                      "retweet": tweet.retweet,
                      "rt_count": tweet.retweets_count,
                      "quote_url": tweet.quote_url,
                      "video": tweet.video,
                      "user_rt_id": tweet.user_rt_id,
                      "near": tweet.near,
                      "geo": tweet.geo,
                      "source": tweet.source,
                      "rt_date": tweet.retweet_date})
    return tweets

In [3]:
res = search_tweets("bakuna", 500)

In [4]:
import pandas as pd
d = pd.DataFrame(res)

In [5]:
d[["datestamp", "place","tweet", "timestamp"]]

Unnamed: 0,datestamp,place,tweet,timestamp
0,2022-04-27,,@sisoykenia_ Hakuna matata ❤,09:20:31
1,2022-04-27,,@mun_hakuna @kitsune_sama626 接客されたい👈,09:20:15
2,2022-04-27,,@hakuna_yoruuu 背景設定して寝てた(´._.` )วｱﾘｶﾞｼｭﾝ,09:19:43
3,2022-04-27,,੯ꔷ̀͡ᓑ\🅣🅔🅡🅐 が今HAKUNAでライブ配信中！ぜひ見にきてね！#HAKUNA #ハク...,09:19:34
4,2022-04-27,,@hakuna_yoruuu おはよるるん！ 朝枠お疲れ様！ 今日もお仕事頑張ってて偉いね！...,09:19:32
...,...,...,...,...
495,2022-04-27,,@deadpool70hr @sabibi6199 @ColliderCraft @Riii...,07:11:00
496,2022-04-27,,@utau_tanaka それは、ひなも一緒に埋めつくしたいタイプ。だって枠主挟んで間接的に...,07:10:51
497,2022-04-27,,@sabibi6199 @ColliderCraft @FactionChosen #Cho...,07:10:42
498,2022-04-27,,朝配信だよぉ(*´ω｀*)おはょを待ってます*_ _) 👶🏻🤞🏻あおい が今HAKUNAでラ...,07:10:15
