# Get messages of each user in a particular period of time, from Twitter API

In [None]:
from configparser import ConfigParser
from typing import List

from datetime import datetime
import pandas as pd
import tweepy

##############

# Accounts input (sampled users)
accounts_file_name = "edang-firstvoters-info-samples.xlsx"
accounts_sheet_name = "samples"
screen_name_col = "screen_name"

# Read Twitter API keys and tokens from a config file
config_object = ConfigParser()
config_object.read("config.ini")
twitter_auth = config_object["TWITTER_AUTH"]

consumer_key = twitter_auth["CONSUMER_KEY"]
consumer_secret = twitter_auth["CONSUMER_SECRET"]
access_token = twitter_auth["ACCESS_TOKEN"]
access_token_secret = twitter_auth["ACCESS_TOKEN_SECRET"]
    
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)

#############

# datetime format from Zocial Eye Excel export
datetime_format = '%Y-%m-%d %H:%M:%S'  # 2019-02-15 15:50:56

# the election day
election_date = datetime(2019, 3, 24)

start_date = datetime(2019, 3, 23) 
end_date = datetime(2019, 3, 25)

# date limit: back to 1 Dec 2018
_DATE_LIMIT = datetime(2018, 12, 1)

_TWEETS_LIMIT = 1000

## Read sampled user list

In [None]:
screen_names_df = pd.read_excel(accounts_file_name, sheet_name=accounts_sheet_name, usecols=[screen_name_col])
screen_names = screen_names_df["screen_name"].drop_duplicates().values.tolist()

print(f"Screen names: {screen_names[:1]} .. {screen_names[-1:]}")
print(f"Total: {len(screen_names):,}")

## Get tweets of each user and write them to file

In [None]:
def limit_handled(cursor):
    while True:
        try:
            yield cursor.next()
        except StopIteration:
            return
        except tweepy.RateLimitError:
            print("Rate limit reached, wait for 15 seconds...")
            time.sleep(15 * 60)


def get_tweets(username: str, filename: str, tweets_limit: int = _TWEETS_LIMIT, date_limit=_DATE_LIMIT) -> str:
    tweets_num = 0
    with open(filename, "w") as f:
        try:
            for status in limit_handled(tweepy.Cursor(api.user_timeline, screen_name=username).items()):
                if status.created_at < date_limit:
                    print("Date limit reached.")
                    break
                f.writelines([
                    f"{status.text}\n",
                    f"{status.created_at}\n",
                    "----\n",
                ])
                tweets_num += 1
        except tweepy.TweepError:
            print("TweepError: Twitter error response: status code = 401 (maybe protected/suspended, or bad connection)")

    return tweets_num

def get_tweets_from_users(screen_names):
    for i, screen_name in enumerate(screen_names):
        print(f"{i}: {screen_name} - get tweets...")
        tweets_num = get_tweets(screen_name, f"{screen_name.lower()}.txt")
        print(f"Done: {tweets_num} tweets")

In [None]:
# get_tweets_from_users(["bact"])

get_tweets_from_users(screen_names)