In [None]:
import tweepy
import pandas as pd
import time
import sys
import matplotlib.pyplot as plt
import preprocessing as prep
import nltk

In [None]:
def login(key, secret, rate_limit_control):
    auth = tweepy.AppAuthHandler(key, secret)
    api = tweepy.API(auth, wait_on_rate_limit=rate_limit_control, wait_on_rate_limit_notify=rate_limit_control)
    if not api:
        return "Can't Authenticate"
    else:
        return api

In [None]:
def limit_handler(m):
    print("Rate limit reached. Sleeping for {0} minutes. Oh Jeez!".format(m))
    for i in range(m):
        time.sleep(60)
        print("It's been {0} minutes since darkness started.".format(i + 1))

In [None]:
def collect_tweets(user, page, api):
    print("Collecting Tweets for User: {0} -----------------------------------------------------".format(user))
    tweets = []
    for x in tweepy.Cursor(api.user_timeline, id='TheTweetOfGod', count=200, tweet_mode="extended", include_rts="false", exclude_replies="true").pages(page):
        tweets += x
    text = [[tweet._json["full_text"].replace("\n","").replace("\t",""), tweet.favorite_count, tweet.retweet_count] for tweet in tweets]
    user_df = pd.DataFrame(data=text)
    user_df.columns = ["text", "fav", "ret"]
    user_df['score'] = user_df['fav'] + user_df['ret']
    user_df = user_df.drop(['ret', 'fav'], axis=1)
    user_df.to_csv(user + "_tweets.csv")
    print("DONE! {0} tweets collected from {1}".format(len(user_df), user))
    return user_df

In [None]:
def collect_followers(user, page, api):
    folls = []
    for x in tweepy.Cursor(api.followers, count=200, id='TheTweetOfGod').pages(page):
        folls +=x
    text = [fol.screen_name for fol in folls]
    foll_df = pd.DataFrame(data=text)
    foll_df.columns = ["name"]
    foll_df.to_csv(user + "_followers.csv")
    return foll_df

In [None]:
def collect_favs(user, api):
    try:
        users_df = pd.read_csv(user + "_followers.csv")
    except:
        print(user + "_followers.csv can not be found!")
        sys.exit(1)
    # collect favorited tweets of followers of the user
    print("Total number of users: {0}".format(len(users_df)))
    x = 0
    started = False
    while started == False:
        try:
            favs = api.favorites(users_df.at[x, 'name'], count=200, tweet_mode="extended")
            started = True
        except tweepy.RateLimitError as e:
            limit_handler(15)
            continue
        except tweepy.TweepError as e:
            if str(e) == "Not authorized.":
                print("XXXXX NOT AUTHORIZED XXXXX")
                x = x + 1
    for i in range(x+1,users_df.shape[0]):
        try:
            favs = favs + api.favorites(users_df.at[i, 'name'], count=200, tweet_mode="extended")
            print("Collecting user {0}".format(i))
        except tweepy.RateLimitError as e:
            limit_handler(15)
            continue
        except tweepy.TweepError as e:
            if str(e) == "Not authorized.":
                print("XXXXX NOT AUTHORIZED XXXXX")
            else:
                print(str(e))
        except e:
            print(e)
            #sys.exit(1)
    print("DONE! {0} favorited tweets in the pocket".format(len(favs)))
    text = [[fav.full_text.replace("\n","").replace("\t","")] for fav in favs if fav.lang=="en"]
    fav_df = pd.DataFrame(data=text)
    fav_df.columns = ["text"]
    print("We are left with {0} tweets after language filter.".format(len(fav_df)))
    fav_df.to_csv(user + "_folls_favs_extented3.csv")
    return fav_df

In [None]:
def df_tokenize(df):
  df = prep.remove_url(df)
  df = prep.remove_mentions(df)
  df = prep.remove_num(df)
  df = prep.remove_punc(df)
  df = prep.remove_repeat(df)
  df = prep.get_low(df)
  df = df.apply(nltk.word_tokenize)
  return df

In [None]:
def clense(df):
  df = prep.remove_url(df)
  df = prep.remove_mentions(df)
  df = prep.remove_num(df)
  df = prep.remove_punc(df)
  df = prep.remove_repeat(df)
  df = prep.get_low(df)
  return df

In [None]:
api = login("PIIQvbe9BuOPO1zz4IC695TGh", "IbRMtJjmplbRHV7z5ILSB0DVNSgryHn5uufjYRP8jYCMRYTcyC", False)

In [None]:
folowers_df = collect_followers("TheTweetOfGod",10, api)
folowers_df

In [None]:
favs_df = collect_favs("TheTweetOfGod", api)
favs_df

In [None]:
user_df = collect_tweets("TheTweetOfGod", 15, api)
user_df