# import packages

In [None]:
import tweepy
import yaml
import json
import sqlite3
from datetime import datetime
import pandas as pd
from collections import Counter
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from matplotlib.ticker import FuncFormatter

# Read twitter Authentication Keys

In [None]:
# yaml file reader funtion
def read_yaml(file_path):
    with open(file_path, "r") as f:
        return yaml.safe_load(f)

# yaml config file path
file_path = "twitter_api_key_config.yaml"
# read from config file
api_credential = read_yaml(file_path)

# Create Twitter Authentication

In [None]:
# API authentication
auth = tweepy.OAuthHandler(api_credential["api_key"], \
                           api_credential["api_secret_token"])
auth.set_access_token(api_credential["access_token"], \
                      api_credential["access_token_secret"])
api = tweepy.API(auth, wait_on_rate_limit=True)

# Create Database and Table

In [None]:
# establish a database connection
conn = sqlite3.connect('tweet_example_historic_v1.db')
cur = conn.cursor()
create_tweet_info_table = """CREATE TABLE tweet_info_historic_v1(tweet_id BIGINT PRIMARY KEY,\
                                          user_id BIGINT,\
                                          tweet_lang TEXT,\
                                          tweet_time TEXT,\
                                          source TEXT,\
                                          tweet_text TEXT,\
                                          quote_count TEXT,\
                                          reply_count INT,\
                                          retweet_count INT,\
                                          tweet_favorite_count INT,\
                                          hashtags TEXT,\
                                          short_urls TEXT,\
                                          expanded_urls TEXT,\
                                          user_mentions TEXT,\
                                          in_reply_to_user_id BIGINT,\
                                          in_reply_to_user_name TEXT,\
                                          in_reply_to_status_id BIGINT,\
                                          retweet_id BIGINT,\
                                          retweet_user_id BIGINT,\
                                          retweet_user_name TEXT,\
                                          quote_id BIGINT,\
                                          quote_user_id BIGINT,\
                                          quote_user_name TEXT);"""

create_user_info_table =  """CREATE TABLE user_info_historic_v1(user_id BIGINT PRIMARY KEY, \
                                          user_screen_name TEXT, \
                                          user_name TEXT, \
                                          user_language TEXT, \
                                          location TEXT, \
                                          profile_url TEXT, \
                                          description TEXT, \
                                          protected TEXT, \
                                          verified TEXT, \
                                          created_at TEXT, \
                                          friends_count BIGINT, \
                                          followers_count BIGINT,\
                                          favorites_count BIGINT, \
                                          statuses_count BIGINT);"""

cur.execute(create_tweet_info_table)
cur.execute(create_user_info_table)
conn.close()

# Collect historic Tweets

# Tweet explorer

In [None]:
class TweetExplorer:
    def __init__(self):
        pass
    def add_quote_content(self, tweet_content, quote_content, add_string=";"):
        if tweet_content and quote_content:
            added_content = tweet_content + add_string + quote_content
        elif not tweet_content and quote_content:
            added_content = quote_content
        elif tweet_content and not quote_content:
            added_content = tweet_content
        else:
            added_content = ""
        return added_content

    def get_text_entities(self, tweet_object):
        # tweet-text
        if "text" in tweet_object:
            tweet_text = tweet_object["text"]
        else:
            tweet_text = tweet_object["full_text"]
            
        # hashtags
        hashtags = [hashtag["text"] \
            for hashtag in tweet_object["entities"]["hashtags"]]
        hashtags = ";".join(hashtags) if hashtags else ""
        # user-mentions-id
        user_mentions_id = [x["id_str"] \
                            for x in tweet_object["entities"]["user_mentions"]]
        user_mentions_id = ";".join(user_mentions_id) if user_mentions_id else ""
        # user-mentions-name
        user_mentions_name = [x["screen_name"] \
                                     for x in tweet_object["entities"]["user_mentions"]]
        user_mentions_name = ";".join(user_mentions_name) if user_mentions_name else ""
        # short-urls
        short_urls = [x["url"] \
            for x in tweet_object["entities"]["urls"]]
        short_urls = ";".join(short_urls) if short_urls else ""
        # expanded-urls
        expanded_urls = [x["expanded_url"] \
            for x in tweet_object["entities"]["urls"]]
        expanded_urls = ";".join(expanded_urls) if expanded_urls else ""
        
        return tweet_text, hashtags,\
    user_mentions_id, user_mentions_name,\
    short_urls, expanded_urls

In [None]:
tweet_explorer = TweetExplorer()
class databaseManager:
    def __init__(self):
        self.unique_user_id_set = set([])
        self.conn = sqlite3.connect('tweet_example_historic_v1.db')
        self.cur  = self.conn.cursor()
    def insert_data(self, tweet_object):
        #tweet_object=json.loads(data) # convert "string-line" into json
        # check if json object has a key id. Otherwise continue to next.
        if 'id' in tweet_object.keys(): 
            
            # tweet object information
            tweet_id        = tweet_object['id']
            user_id         = tweet_object['user']['id']
            tweet_lang      = tweet_object['lang']
            tweet_time      = str(pd.to_datetime(tweet_object['created_at']))
            source          = tweet_object['source']


            # tweet numeric information
            #quote_count = tweet_object['quote_count']
            quote_count = 0
            reply_count = 0
            #reply_count = tweet_object['reply_count']
            retweet_count = tweet_object['retweet_count']
            tweet_favorite_count = tweet_object['favorite_count']


            # reply inofromation
            in_reply_to_user_id = tweet_object['in_reply_to_user_id']
            in_reply_to_user_name = tweet_object['in_reply_to_screen_name']
            in_reply_to_status_id = tweet_object['in_reply_to_status_id']

            ## complicated tweet-info collection
            is_retweet = False
            is_quote = False
        
            if "extended_tweet" in tweet_object.keys():
                tweet_content = tweet_object["extended_tweet"]
            else:
                tweet_content = tweet_object
                
            native_tweet_text,\
            native_hashtags,\
            native_user_mentions_id,\
            native_user_mentions_name,\
            native_short_urls,\
            native_expanded_urls = tweet_explorer.get_text_entities(tweet_content)

            # case: retweet 
            if "retweeted_status" in tweet_object:
                is_retweet = True
                # case: extended-retweet 
                if "extended_tweet" in tweet_object["retweeted_status"].keys():
                    retweet_content = tweet_object["retweeted_status"]["extended_tweet"]
                # case: not-extended-retweet 
                else:
                    retweet_content = tweet_object["retweeted_status"]

                retweet_tweet_text,\
                retweet_hashtags,\
                retweet_user_mentions_id,\
                retweet_user_mentions_name,\
                retweet_short_urls,\
                retweet_expanded_urls = tweet_explorer.get_text_entities(retweet_content)

            # case: quote-tweet
            if "quoted_status" in tweet_object:
                is_quote = True
                # case: extended-quote 
                if "extended_tweet" in tweet_object["quoted_status"].keys():
                    quote_content = tweet_object["quoted_status"]["extended_tweet"]
                else:
                    quote_content = tweet_object["quoted_status"]

                quote_tweet_text,\
                quote_hashtags,\
                quote_user_mentions_id,\
                quote_user_mentions_name,\
                quote_short_urls,\
                quote_expanded_urls = tweet_explorer.get_text_entities(quote_content)

            if is_retweet and is_quote:
                tweet_text = tweet_explorer.add_quote_content(retweet_tweet_text,\
                     quote_tweet_text, add_string=" ")
                hashtags = tweet_explorer.add_quote_content(retweet_hashtags,\
                     quote_hashtags)
                user_mentions_id = tweet_explorer.add_quote_content(native_user_mentions_id,\
                     quote_user_mentions_id)
                user_mentions_name = tweet_explorer.add_quote_content(native_user_mentions_name,\
                     quote_user_mentions_name)
                short_urls = tweet_explorer.add_quote_content(retweet_short_urls,\
                     quote_short_urls)
                expanded_urls = tweet_explorer.add_quote_content(retweet_expanded_urls,\
                     quote_expanded_urls)

            elif is_retweet and not is_quote:
                tweet_text = retweet_tweet_text
                hashtags = retweet_hashtags
                user_mentions_id = native_user_mentions_id
                user_mentions_name = native_user_mentions_name
                short_urls = retweet_short_urls
                expanded_urls = retweet_expanded_urls

            elif not is_retweet and is_quote:
                tweet_text = tweet_explorer.add_quote_content(native_tweet_text,\
                     quote_tweet_text, add_string=" ")
                hashtags = tweet_explorer.add_quote_content(native_hashtags,\
                     quote_hashtags)
                user_mentions_id = tweet_explorer.add_quote_content(native_user_mentions_id,\
                     quote_user_mentions_id)
                user_mentions_name = tweet_explorer.add_quote_content(native_user_mentions_name,\
                     quote_user_mentions_name)
                short_urls = tweet_explorer.add_quote_content(native_short_urls,\
                     quote_short_urls)
                expanded_urls = tweet_explorer.add_quote_content(native_expanded_urls,\
                     quote_expanded_urls)
            else:
                tweet_text = native_tweet_text #8
                hashtags = native_hashtags #9
                user_mentions_id = native_user_mentions_id #10
                user_mentions_name = native_user_mentions_name #11
                short_urls = native_short_urls #12
                expanded_urls = native_expanded_urls #13
                        
            # retweet and quote status case
            if is_retweet:
                retweet_id = tweet_object['retweeted_status']['id']
                retweet_user_id = tweet_object['retweeted_status']['user']['id']
                retweet_user_name = tweet_object['retweeted_status']['user']['screen_name']
            else:
                retweet_id = None
                retweet_user_id = None
                retweet_user_name = None

            if is_quote:
                quote_id = tweet_object['quoted_status']['id']
                quote_user_id = tweet_object['quoted_status']['user']['id']
                quote_user_name = tweet_object['quoted_status']['user']['screen_name']
            else:
                quote_id = None
                quote_user_id = None
                quote_user_name = None
            
            tweet_info = (tweet_id, user_id, tweet_lang,\
                    tweet_time, source, tweet_text,\
                    quote_count, reply_count, retweet_count,\
                    tweet_favorite_count, hashtags, short_urls,\
                    expanded_urls, user_mentions_id,\
                    in_reply_to_user_id, in_reply_to_user_name,\
                    in_reply_to_status_id, retweet_id,\
                    retweet_user_id, retweet_user_name,\
                    quote_id, quote_user_id,\
                    quote_user_name)
            self.cur.execute("INSERT INTO tweet_info_historic_v1 \
            VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,\
             ?, ?, ?, ?, ?, ?, ?, ?, ?);", tweet_info)
            
            # user profile information

            if user_id in self.unique_user_id_set:
                pass
            else:
                self.unique_user_id_set.add(user_id)
                
                user_screen_name      = tweet_object['user']['screen_name']
                user_name             = tweet_object['user']['name']
                user_language         = tweet_object['user']['lang']  
                location              = tweet_object['user']['location']
                profile_url           = tweet_object['user']['url']
                description           = tweet_object['user']['description']
                protected             = tweet_object['user']['protected']
                verified              = tweet_object['user']['verified']
                created_at            = str(pd.to_datetime(tweet_object['user']['created_at']))
                friends_count         = tweet_object['user']['friends_count']
                followers_count       = tweet_object['user']['followers_count']
                favorites_count       = tweet_object['user']['favourites_count']
                statuses_count        = tweet_object['user']['statuses_count']

                user_information = (user_id, user_screen_name, user_name,\
                       user_language, location, profile_url,\
                       description, protected, verified, created_at,\
                       friends_count, followers_count,\
                       favorites_count, statuses_count)
                self.cur.execute(" INSERT INTO user_info_historic_v1 VALUES(?, ?, ?, ?, ?, ?, ?, ?,\
                ?, ?, ?, ?, ?, ?);", user_information)
            
            self.conn.commit()
    def close_database_connection(self):
        self.conn.close()

# Search Tweets
https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/overview

# Rules and Filtering
https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/build-standard-queries

# Collect all availble historic tweets in the last seven days

In [None]:
all_historic_tweets = []
for historic_tweets in tweepy.Cursor(api.search, q="covid", count=100).items():
    all_historic_tweets.append(historic_tweets)

In [None]:
len(all_historic_tweets)

In [None]:
historic_tweet_collection = []
upper_limit = 2000
histroic_tweet_count = 0
dbm = databaseManager()
historic_tweet_count = 0
search_query = "covid"
location = "35.0853336,-106.6055534,100km"
for tweet in tweepy.Cursor(api.search, \
                                     q=search_query, \
                                     geocode=location,\
                                     count=100).items():
    dbm.insert_data(tweet._json)
    historic_tweet_count += 1
    print(f"historic_tweet_count: {historic_tweet_count}")
    if historic_tweet_count > upper_limit:
        break
        
print(historic_tweet_count)

dbm.close_database_connection()

# Get histroic tweets into dataframe

In [None]:
conn = sqlite3.connect('tweet_example_historic_v1.db')
cur  = conn.cursor()

In [None]:
cur.execute("SELECT * FROM tweet_info_historic_v1")
tweet_info_all_result = cur.fetchall()
tweet_info_column_names = [description[0] for description in cur.description]
tweet_info_dataframe = pd.DataFrame(tweet_info_all_result, \
                                   columns=tweet_info_column_names)
print(f"No of tweet stored: {len(tweet_info_all_result)}, {tweet_info_dataframe.shape[0]}\n")



cur.execute("SELECT * FROM user_info_historic_v1")
user_info_all_result = cur.fetchall()
user_info_column_names = [description[0] for description in cur.description]
user_info_dataframe = pd.DataFrame(user_info_all_result, \
                                   columns=user_info_column_names)
print(f"No of user info stored: {len(user_info_all_result)}, {user_info_dataframe.shape[0]}\n")

In [None]:
user_info_dataframe.head()

In [None]:
if 'datetime' not in tweet_info_dataframe.columns:
    tweet_info_dataframe["datetime"] = pd.to_datetime(tweet_info_dataframe["tweet_time"])
    
if 'datetime' not in user_info_dataframe.columns:
    user_info_dataframe["datetime"] = pd.to_datetime(user_info_dataframe["created_at"])

In [None]:
import warnings
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from nltk import FreqDist
from nltk.util import ngrams
import matplotlib.pyplot as plt
import re
import string

In [None]:
cur.execute("SELECT tweet_text FROM tweet_info_historic_v1")
all_tweet_text = cur.fetchall()
all_tweet_list=[x[0] for x in all_tweet_text]


warnings.filterwarnings("ignore")

punctuation = [x for x in string.punctuation]
stop_words = stopwords.words('english') + punctuation + ['rt', 'via',"i'm","don't"]

pat1 = r'@[A-Za-z0-9_]+'
pat2 = r'https?://[^ ]+'
combined_pat = r'|'.join((pat1, pat2))
www_pat = r'www.[^ ]+'

tokenizer = TweetTokenizer()

def tweet_tokenizer(verbatim):
    try:
        stripped = re.sub(combined_pat, '', verbatim)
        stripped = re.sub(www_pat, '', stripped)
        lower_case = stripped.lower()
        letters_only = re.sub("[^a-zA-Z]", " ", lower_case)
    
        all_tokens = tokenizer.tokenize(letters_only)
        
        # this line filters out all tokens that are entirely non-alphabetic characters
        filtered_tokens = [t for t in all_tokens if t.islower()]
        # filter out all tokens that are <2 chars
        filtered_tokens = [x for x in filtered_tokens if len(x)>1]
        
        filtered_tokens = [term for term in filtered_tokens if term not in stop_words]
        
        out_text=' '.join(filtered_tokens)
    except IndexError:
        out_text=''
        filtered_tokens = []
    return(out_text)
    

test_bed = [tweet_tokenizer(x) for x in all_tweet_list]
all_concat_str = ' '.join(test_bed)
#freq_dist_count=FreqDist(ngrams(all_concat_str.split(), 2))                  
freq_dist_count = FreqDist(all_concat_str.split())  

"""
to_WC = freq_dist_count.most_common(100)
wordcloud_purged = WordCloud(max_font_size=120, max_words=100, background_color="white",\
                      width=800, height=600)\
                      .generate_from_frequencies(dict(to_WC))

plt.figure()
plt.imshow(wordcloud_purged, interpolation="bilinear")
plt.axis("off")
plt.show()

wordcloud_purged.to_file("WC_BIGRAM_LIB_NEG_T100.png")     
"""
a= 3

In [None]:
freq_dist_count

In [None]:
tweet_info_dataframe.head()

In [None]:
def get_median_frequency(df, key, frequency='W',\
                        start_date='2009-01-01', end_date='2021-09-20'):
    
    df = df[[key, 'datetime']].drop_duplicates() 
    counts_month = df[[key, 'datetime']].groupby(pd.Grouper(key='datetime', freq=frequency)).median()

    
    return counts_month

def get_mean_frequency(df, key, frequency='W',\
                        start_date='2009-01-01', end_date='2021-09-20'):
    
    df = df[[key, 'datetime']].drop_duplicates() 
    counts_month = df[[key, 'datetime']].groupby(pd.Grouper(key='datetime', freq=frequency)).mean()
    
    
    return counts_month

def get_count_frequency(df, key, frequency='M',\
                        start_date='2009-01-01', end_date='2021-09-01'):
    
    #df = df[[key, 'datetime']].drop_duplicates() 
    counts_month = df[[key, 'datetime']].groupby(pd.Grouper(key='datetime', freq=frequency)).count()
    
    return counts_month

In [None]:
created_per_unit = get_count_frequency(user_info_dataframe,\
                                       key='user_id', frequency='M')



In [None]:
tweet

In [None]:

pip install networkx

In [None]:
import networkx as nx

In [None]:
pip install decorator==5.0.7

In [None]:
G = nx.Graph()

In [None]:
G = nx.Graph()
G.add_nodes_from([1,2,3])
G.add_edges_from([(1, 2), (1, 3)])
nx.draw(G)