In [2]:
import twitter
import pandas as pd

In [3]:
#### https://github.com/bear/python-twitter/blob/master/get_access_token.py

from __future__ import print_function

from requests_oauthlib import OAuth1Session
import webbrowser

import sys

if sys.version_info.major < 3:
    input = raw_input

REQUEST_TOKEN_URL = 'https://api.twitter.com/oauth/request_token'
ACCESS_TOKEN_URL = 'https://api.twitter.com/oauth/access_token'
AUTHORIZATION_URL = 'https://api.twitter.com/oauth/authorize'
SIGNIN_URL = 'https://api.twitter.com/oauth/authenticate'


def get_access_token(consumer_key, consumer_secret):
    """Get an access token for a given consumer key and secret.
    Args:
        consumer_key (str):
            Your application consumer key.
        consumer_secret (str):
            Your application consumer secret.
    Returns:
        (None) Prints to command line.
    """
    oauth_client = OAuth1Session(consumer_key, client_secret=consumer_secret, callback_uri='oob')

    print('\nRequesting temp token from Twitter...\n')

    resp = oauth_client.fetch_request_token(REQUEST_TOKEN_URL)

    url = oauth_client.authorization_url(AUTHORIZATION_URL)

    print('I will try to start a browser to visit the following Twitter page '
          'if a browser will not start, copy the URL to your browser '
          'and retrieve the pincode to be used '
          'in the next step to obtaining an Authentication Token: \n'
          '\n\t{0}'.format(url))

    webbrowser.open(url)
    pincode = input('\nEnter your pincode? ')

    print('\nGenerating and signing request for an access token...\n')

    oauth_client = OAuth1Session(consumer_key, client_secret=consumer_secret,
                                 resource_owner_key=resp.get('oauth_token'),
                                 resource_owner_secret=resp.get('oauth_token_secret'),
                                 verifier=pincode)
    try:
        resp = oauth_client.fetch_access_token(ACCESS_TOKEN_URL)
    except ValueError as e:
        raise 'Invalid response from Twitter requesting temp token: {0}'.format(e)

    print('''Your tokens/keys are as follows:
        consumer_key         = {ck}
        consumer_secret      = {cs}
        access_token_key     = {atk}
        access_token_secret  = {ats}'''.format(
            ck=consumer_key,
            cs=consumer_secret,
            atk=resp.get('oauth_token'),
            ats=resp.get('oauth_token_secret')))


def main():
    """Run script to get access token and secret for given app."""
    consumer_key = input('Enter your consumer key: ')
    consumer_secret = input('Enter your consumer secret: ')
    get_access_token(consumer_key, consumer_secret)

In [None]:
main()

In [6]:
api = twitter.Api(consumer_key=consumer_key,
                              consumer_secret=consumer_secret,
                              access_token_key=access_token_key,
                              access_token_secret=access_token_secret)

In [7]:
print(api.VerifyCredentials())

{"created_at": "Mon Jun 15 22:05:34 +0000 2020", "default_profile": true, "description": "Cosmologists with interest in Earth", "favourites_count": 12, "followers_count": 1, "friends_count": 35, "id": 1272650783918448640, "id_str": "1272650783918448640", "name": "Vanessa Boehm", "profile_background_color": "F5F8FA", "profile_banner_url": "https://pbs.twimg.com/profile_banners/1272650783918448640/1592260389", "profile_image_url": "http://pbs.twimg.com/profile_images/1272658299679150081/hcWVZGJh_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1272658299679150081/hcWVZGJh_normal.jpg", "profile_link_color": "1DA1F2", "profile_sidebar_border_color": "C0DEED", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": true, "screen_name": "VanessaBoehm8", "status": {"created_at": "Tue Jun 16 21:56:41 +0000 2020", "id": 1273011640758231041, "id_str": "1273011640758231041", "lang": "en", "retweet_count": 2, "retweeted":

In [8]:
api.GetFollowers()

[User(ID=1271253492603080705, ScreenName=chmod_i)]

In [9]:
timeline = api.GetUserTimeline(screen_name='BernieSanders', count=1000)

In [31]:
timeline[0].

'“My son and so many other children, their lives matter… My daughter’s life matters. So until we have change in our… https://t.co/IolQoxWJLS'

In [117]:
def convert_twitter_output(timeline):
    output = []
    for tweet in timeline:
        tw ={'created':tweet.created_at,'text':tweet.text}
        output.append(tw)
    return output
    

In [118]:
tweets = convert_twitter_output(timeline)

In [119]:
df = pd.DataFrame(tweets)

In [120]:
df['text']

0      “My son and so many other children, their live...
1      This is what narcissism is about. Trump, in or...
2      We are going to take the fight for justice fro...
3      The people are crying out against police bruta...
4      Congratulations to Rick Krajewski, Nikil Saval...
                             ...                        
195    This is catastrophic. We need to keep every em...
196    The coronavirus crisis has had a devastating e...
197    RT @RealJusticePAC: TONIGHT: Jails are major C...
198    TONIGHT: Join us for a live town hall on the d...
199    The climate crisis is the greatest challenge f...
Name: text, Length: 200, dtype: object

In [125]:
import re

In [122]:
# cleaning 

def remove_url(txt):
    return re.sub(r"http\S+", "", txt)
    
def remove_rt(txt):
    return re.sub(r"RT", "", txt)
      
def remove_hashtag(txt):
    return re.sub(r'#\S+','', txt)

def remove_mentions(txt):
    return re.sub(r'@\S+','', txt)

def remove_controls(txt):
    return re.sub(r'[\n\r\t\f\v]','',txt)

def remove_whitespaces(txt):
    return re.sub(r'  +','',txt)

def remove_numbers(txt):
    return re.sub(r'\d','',txt)

def remove_special_characters(txt):
    return re.sub(r'[^\w ]+','',txt)

def remove_stopwords(txt):
    for w

function_list=[remove_url, remove_rt, remove_controls, remove_mentions, remove_hashtag, remove_whitespaces, remove_numbers, remove_special_characters]

In [123]:
for func in function_list:
    df['text'] = df['text'].apply(lambda x: func(x))

In [133]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [127]:
from nltk.tokenize import RegexpTokenizer

In [129]:
tokenizer = RegexpTokenizer(r'\w+')
    
df['text'] = df['text'].apply(lambda x: tokenizer.tokenize(x.lower()))

In [134]:
from nltk.corpus import stopwords

def remove_stopwords(x):
    return [ww for ww in x if ww not in stopwords.words('english')]

df['text'] = df['text'].apply(lambda x: remove_stopwords(x)) 

In [144]:
from nltk.stem import WordNetLemmatizer
lemmatizer= WordNetLemmatizer()

In [145]:
def word_lemmatizer(x):
    return [lemmatizer.lemmatize(ww) for ww in x]

df['text'] = df['text'].apply(lambda x: word_lemmatizer(x)) 

['narcissism',
 'trump',
 'order',
 'hear',
 'cheer',
 'adoring',
 'crowd',
 'defy',
 'science',
 'sacri']