# Managing Twitter handles

In [1]:
import pandas as pd
import daiquiri
import logging
import sys
import os.path as path
from twitter_scraping.twitter_scraping import TweetsFilter, TwitterScraper

%load_ext autoreload

## Instantiate relevant objects

In [2]:
LOGS_DIR = '../logs/'

daiquiri.setup(
    level=logging.INFO,
    outputs=(
        daiquiri.output.Stream(sys.stdout),
        daiquiri.output.File(
            path.join(LOGS_DIR, 'update_tweets.log'),
            formatter=daiquiri.formatter.TEXT_FORMATTER
        )
    )
)

logger = daiquiri.getLogger(__name__)

In [3]:
twitter_scraper = TwitterScraper(logger)
twitter_scraper.load_tweets()

tweets_filter = TweetsFilter(twitter_scraper.tweets_df)

Get the number of tweets for each handle.

In [4]:
pd.DataFrame(tweets_filter.tweets_df.groupby(by='twitter_handle').size()).reset_index()

Unnamed: 0,twitter_handle,0
0,888Holdings,132
1,AA,4
2,AdmiralGroup,684
3,Aggreko,3
4,Amigo,6
5,AngloAmerican,2590
6,Antofagasta,1532
7,Ascential,19
8,Assura,1
9,AstraZeneca,3212


Get the date of each handle's last tweet.

In [5]:
tweets_filter.tweets_df.groupby('twitter_handle')['created_at'].max()

twitter_handle
888Holdings       2011-04-28 13:42:55
AA                2012-05-15 06:13:24
AdmiralGroup      2018-10-19 16:42:41
Aggreko           2014-04-23 07:16:38
Amigo             2016-12-20 01:42:41
AngloAmerican     2018-10-25 14:22:01
Antofagasta       2018-10-17 00:48:12
Ascential         2017-08-07 23:20:29
Assura            2015-06-20 20:15:47
AstraZeneca       2018-10-26 09:59:06
Avast             2018-03-17 15:54:10
Aviva             2018-02-19 00:24:45
BHP               2018-10-26 07:16:24
BP                2017-10-25 18:19:46
BT                2018-10-25 20:07:53
BTG               2018-10-19 15:39:03
BalfourBeatty     2018-10-25 15:48:22
BankofGeorgia     2018-10-25 18:17:16
Barclays          2018-10-25 14:12:38
BeazleyGroup      2018-10-18 12:26:23
BritishLand       2015-12-02 12:36:38
Britvic           2018-10-25 14:18:41
Burberry          2018-10-25 15:00:14
CLSHoldings       2018-09-28 09:58:44
CRH               2018-10-24 14:56:16
CairnEnergy       2018-10-25 11:37:

Generate a combined dataframe with twitter handle, number of tweets and datetime for the lates tweet.

In [6]:
handles_summary_df = pd.concat(
    [tweets_filter.tweets_df.groupby(by='twitter_handle').size(),
    tweets_filter.tweets_df.groupby('twitter_handle')['created_at'].max()],
    axis=1
).reset_index().rename({0: 'n_tweets'}, axis=1)

handles_summary_df

Unnamed: 0,twitter_handle,n_tweets,created_at
0,888Holdings,132,2011-04-28 13:42:55
1,AA,4,2012-05-15 06:13:24
2,AdmiralGroup,684,2018-10-19 16:42:41
3,Aggreko,3,2014-04-23 07:16:38
4,Amigo,6,2016-12-20 01:42:41
5,AngloAmerican,2590,2018-10-25 14:22:01
6,Antofagasta,1532,2018-10-17 00:48:12
7,Ascential,19,2017-08-07 23:20:29
8,Assura,1,2015-06-20 20:15:47
9,AstraZeneca,3212,2018-10-26 09:59:06


## Test the function in the module

In [7]:
%autoreload 2

from app_components.app_components import generate_handles_summary

In [8]:
generate_handles_summary(tweets_filter.tweets_df)

Unnamed: 0,twitter_handle,n_tweets,created_at
0,888Holdings,132,2011-04-28 13:42:55
1,AA,4,2012-05-15 06:13:24
2,AdmiralGroup,684,2018-10-19 16:42:41
3,Aggreko,3,2014-04-23 07:16:38
4,Amigo,6,2016-12-20 01:42:41
5,AngloAmerican,2590,2018-10-25 14:22:01
6,Antofagasta,1532,2018-10-17 00:48:12
7,Ascential,19,2017-08-07 23:20:29
8,Assura,1,2015-06-20 20:15:47
9,AstraZeneca,3212,2018-10-26 09:59:06
