# Docs

Tweepy Docs: http://docs.tweepy.org/en/latest/api.html    # Community Library (supports Standard only)</br>
TwitterAPI Docs: http://geduldig.github.io/TwitterAPI/    # Community Library (supports Premium)</br>
Search Tweets API: https://twitterdev.github.io/search-tweets-python/    # Official Python Twitter API

# Import Libraries

In [186]:
import os
import sys
import glob
import math
import time
import json
import importlib
from datetime import datetime
import re

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import tweepy as tw
from TwitterAPI import TwitterAPI

In [3]:
os.chdir('C:\\Users\\Craig D\\Code\\twitter\\notebooks')

In [4]:
projFol = os.path.dirname(os.getcwd())
modelsFol = os.path.join(projFol,"models")
dataFol = os.path.join(projFol,"data")

print("Current working directory:\n",os.getcwd())
print("\nProj folder:\n",projFol,"\nModels folder:\n",modelsFol,"\nData folder;\n",dataFol)

sys.path.append(modelsFol)
# print("\nSystem Path folders:")
# sys.path

# importlib.reload(clean)

Current working directory:
 C:\Users\Craig D\Code\twitter\notebooks

Proj folder:
 C:\Users\Craig D\Code\twitter 
Models folder:
 C:\Users\Craig D\Code\twitter\models 
Data folder;
 C:\Users\Craig D\Code\twitter\data


In [5]:
import config

# Authentication

In [6]:
consumer_key= config.consumer_key
consumer_secret= config.consumer_secret
access_token= config.access_token
access_token_secret= config.access_token_secret

In [7]:
auth = tw.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

In [8]:
api = tw.API(auth, wait_on_rate_limit=True)
twAPI = TwitterAPI(consumer_key, consumer_secret, access_token, access_token_secret)

# Twitter Basics

Original tweets - not in response to a tweet

# Search Twitter for tweets

## define search terms

In [104]:
# Define the search term and the date_since date as variables
search_words = "#waterrights"
date_since = "2020-06-01"
date_until = "2020-07-08"

In [105]:
# optionally filter retweets
no_retweets = search_words + " -filter:retweets"
no_retweets

'#waterrights -filter:retweets'

## using tweepy

### search tweets > cursor.ItemIterator 

In [106]:
tweepyC = tw.Cursor(api.search,
                   result_type='recent',
                   q=search_words, # new_search, #
                   lang="en",
                   since=date_since,
                   until=date_until).items(100) #.items(5)

In [107]:
type(tweepyC)     # examine class

tweepy.cursor.ItemIterator

In [96]:
print(dir(tweepyC))    # examine class objects

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'current_page', 'limit', 'next', 'num_tweets', 'page_index', 'page_iterator', 'prev']


In [59]:
tweepyC

<tweepy.cursor.ItemIterator at 0x1d3d65b1880>

### cursor.ItemIterator > models.Status

In [108]:
tweepyList = [tweet for tweet in tweepyC]

In [109]:
type(tweepyList[0])    # examine class

tweepy.models.Status

In [99]:
print(dir(tweepyList[0]))    # examine class attributes

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_api', '_json', 'author', 'contributors', 'coordinates', 'created_at', 'destroy', 'entities', 'favorite', 'favorite_count', 'favorited', 'geo', 'id', 'id_str', 'in_reply_to_screen_name', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'is_quote_status', 'lang', 'metadata', 'parse', 'parse_list', 'place', 'retweet', 'retweet_count', 'retweeted', 'retweeted_status', 'retweets', 'source', 'source_url', 'text', 'truncated', 'user']


### examine json of single tweet

In [63]:
tweepyList[0]._json.keys()

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'possibly_sensitive', 'lang'])

### tweet jsons > List of Dicts 

In [110]:
tweepyDicts = [status._json for status in tweepyList]

In [111]:
tweepyDicts[0].keys()

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang'])

### examine other tweet params in a df

of the keys shown above, let's start with 
- **created_at**,    #date
- **id**,            tweet id    #integer
- **text**,          tweet text    #string
- **truncated**,     whether tweet is truncated    #boolean
- **entities**,      dict of hashtags, symbols, user_mentions, urls
- **metadata**,     'iso_language_code' & 'result_type'    #dict
- **source**,       Tweetdeck/Android/iPhone/iPad/WebApp/Hootsuite Inc/TwitterforAdvertisers    #'string'
- **in_reply_to_status_id**, None (if original tweet) or Original Tweet id    # integer
- **in_reply_to_user_id**, None (if original tweet) or Original Handle's id    # integer
- **in_reply_to_screen_name**, None (if original tweet) or Original Handle's screen name    # string
- **user**,          dict of id,name,screen_name, location, description, url, entities {'url'},'protected','followers_count','friends_count','listed_count','created_at','favorites_count','utc_offset','time_zone','geo_enabled','verified','statuses_count','lang','contributors_enabled','is_translator','is_translation_enabled','following','follow_request_sent','notifications','translator_type'       # dict
- **geo**,          mostly appears as None
- **coordinates**,  mostly appears as None
- **place**,        mostly appears as None
- **contributors**, mostly appears as None
- **retweeted_status**, dict of all of the keys of a original tweet    #dict
- **is_quote_status**, boolean, whether tweet is a quote
- **retweet_count**, whether this tweet was retweeted, how many times    #int
- **favorite_count**, whether this tweet was favorited, how many times    #int
- **favorited**,    whether tweet was favorited
- **retweeted**,    whether tweet was retweeted
- **lang**,         language of tweet

In [199]:
params = [[json['created_at'],json['text'],[hashtag['text'] for hashtag in json['entities']['hashtags']],json['source'],json['in_reply_to_screen_name'],
           json['user']['screen_name'],json['user']['location'],json['user']['followers_count'],
           json['user']['created_at'],json['user']['statuses_count'],
           json['place'],json['favorited'],json['retweeted'],json['is_quote_status'],
           json['retweet_count'],json['favorite_count'],json['lang']] for json in tweepyDicts]
df = pd.DataFrame(params, columns=['date','text','hashtags','source','in_reply_to','user_screen_name',
                                  'user_location','user_followers','user_created_at','user_tweets',
                                  'place','favorited','retweeted','is_quote_status',
                                  'retweet_count','favorite_count','lang'])

In [219]:
df['date'] = pd.to_datetime(df['date'],format="%a %b %d %H:%M:%S +0000 %Y")
df['source'] = df['source'].apply(lambda source:re.findall(r'\>(.+?)\<',source)[0])
df['user_created_at'] = pd.to_datetime(df['user_created_at'],format="%a %b %d %H:%M:%S +0000 %Y")

In [238]:
df.head(10)

Unnamed: 0,date,text,hashtags,source,in_reply_to,user_screen_name,user_location,user_followers,user_created_at,user_tweets,place,favorited,retweeted,is_quote_status,retweet_count,favorite_count,lang
0,2020-07-07 00:08:03,RT @AALRR: Register Now! | Live Audio Webinar ...,[],TweetDeck,,vxjoe,Rialto CA 92376,472,2009-12-07 08:09:03,54657,,False,False,False,2,0,en
1,2020-07-07 00:07:36,RT @AALRR: Register Now! | Live Audio Webinar ...,[],TweetDeck,,RialtoBoard,"Rialto,CA",616,2010-11-30 06:01:19,61605,,False,False,False,2,0,en
2,2020-07-06 18:38:35,RT @ProtectHarvest: Help us protect your way o...,"[agriculture, farming, ranching, hunting]",Twitter for Android,,CHConservative,South-Central PA,66,2012-10-23 02:31:47,2999,,False,False,False,1,0,en
3,2020-07-06 18:28:00,Help us protect your way of life – consider do...,"[agriculture, farming, ranching]",Twitter for Advertisers,,ProtectHarvest,USA,7709,2011-10-07 21:22:15,23594,,False,False,False,1,4,en
4,2020-07-06 16:49:38,@jneposibo 🙂#Permaculture is so underrated. It...,[Permaculture],Twitter Web App,jneposibo,EarthSymbol,#Australia #Oceania,2551,2017-02-27 02:27:22,27671,,False,False,False,1,0,en
5,2020-07-06 11:35:03,Court upholds ban on sale of water rights by #...,"[farmers, WISA, WaterCrisis, SaveWater]",Hootsuite Inc.,,WaterInstSA,"16th Road Midrand, Gauteng",4483,2016-04-08 08:24:16,2668,,False,False,False,0,1,en
6,2020-07-06 09:19:38,Countdown to a bitter battle over the water of...,"[Africa, Ethiopia, Egypt]",Twitter Web App,,equaltimes,Worldwide,5687,2012-08-21 12:10:23,9376,,False,False,False,0,1,en
7,2020-07-05 18:02:47,"Rudolfo Anaya, a Father of Chicano Literature ...","[obitpix, rudolfoanaya, writer, chicano, newme...",Twitter for iPhone,,PeterLandau,Upper U.S.,975,2008-09-02 15:15:38,26859,,False,False,False,0,4,en
8,2020-07-04 22:29:00,Sincere support to protestors demonstrating ru...,"[naziRally, MtRushmore, RepublicansRWhiteSupre...",Twitter for Android,,L2H2a,,116,2015-12-28 19:44:39,18636,,False,False,False,0,1,en
9,2020-07-03 22:38:00,A complex #water rights situation pits #LasVeg...,"[water, LasVegas, Nevada]",Twitter for Advertisers,,ProtectHarvest,USA,7709,2011-10-07 21:22:15,23594,,False,False,False,0,0,en


In [193]:
re.search(r">(\w+)<",df.iloc[0,3])

<re.Match object; span=(69, 80), match='>TweetDeck<'>

## using Twitter API

In [95]:
twi = twAPI.request('search/tweets', {'q':search_words})

In [97]:
type(twi)

TwitterAPI.TwitterAPI.TwitterResponse

In [96]:
print(dir(twi))    # examine class objects

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'close', 'get_iterator', 'get_quota', 'headers', 'json', 'response', 'status_code', 'stream', 'text']


In [98]:
twiAPIList = [tweet for tweet in twi]

In [101]:
type(twiAPIList[0])    # examine class  #it's a regular dict!

dict

In [102]:
print(dir(twiAPIList[0]))    # examine class objects

['__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']


In [108]:
twiAPIList[2].keys()

dict_keys(['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang'])

In [111]:
tweepyList[0]._json.keys() == twiAPIList[0].keys()

True

# Tweet

In [None]:
# Post a tweet from Python
# api.update_status("Hello world! I'm a twitter bot")
# Your tweet has been posted!