<a href="https://colab.research.google.com/github/jon-chun/SentimentAnalysis/blob/master/twitter_tweepy_2021214_class.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Sentiment Analysis of Tweets**

Jon Chun
14 Oct 2021

# **Install Libraries**

In [None]:
# Library to clean text

!pip install texthero

# Other text cleaning libraries

# !pip install clean-text (better)

# !pip install tweet-preprocessor

In [None]:
# Library to automate twitter OAuth/scraping

!pip install tweepy

# twint (better when working, no API limitations)

In [None]:
# NLP cleaning

!pip install contractions

In [None]:
# Translate emojis and emoticons to text

!pip install emot

# !pip install emoji

# !pip install demoji

In [None]:
!pip install vaderSentiment

## Spell Correction

In [None]:
# !sudo apt-get update -y

In [None]:
# !sudo apt-get install -y swig3.0

In [None]:
# !pip install jamspell

# **Import Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tweepy
import time
import warnings
warnings.filterwarnings('ignore')

In [None]:
import re
# import string

import texthero as hero
from texthero import preprocessing

import contractions

In [None]:
"""
from cleantext import clean

clean("some input",
    fix_unicode=True,               # fix various unicode errors
    to_ascii=True,                  # transliterate to closest ASCII representation
    lower=True,                     # lowercase text
    no_line_breaks=False,           # fully strip line breaks as opposed to only normalizing them
    no_urls=False,                  # replace all URLs with a special token
    no_emails=False,                # replace all email addresses with a special token
    no_phone_numbers=False,         # replace all phone numbers with a special token
    no_numbers=False,               # replace all numbers with a special token
    no_digits=False,                # replace all digits with a special token
    no_currency_symbols=False,      # replace all currency symbols with a special token
    no_punct=False,                 # remove punctuations
    replace_with_punct="",          # instead of removing punctuations you may replace them
    replace_with_url="<URL>",
    replace_with_email="<EMAIL>",
    replace_with_phone_number="<PHONE>",
    replace_with_number="<NUMBER>",
    replace_with_digit="0",
    replace_with_currency_symbol="<CUR>",
    lang="en"                       # set to 'de' for German special handling
)
""";

In [None]:
import emot 
emot_obj = emot.core.emot() 

In [None]:
# Test emot

text = "I love python ☮ 🙂 ❤ :-) :-( :-)))" 
emot_obj.emoticons(text)

In [None]:
from emot.emo_unicode import UNICODE_EMOJI, EMOTICONS_EMO

# import emoji
# import demoji
# demoji.download_codes()

In [None]:
# import preprocessor as tweetproc

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
vader_sa = SentimentIntensityAnalyzer()

In [None]:
from google.colab import files

# **Configuration**


In [None]:
#collapse_show
pd.set_option('display.max_colwidth', -1)

In [None]:
# Enlarge matplotlib plots

plt.rcParams["figure.figsize"] = (20,10)

# **Globals**

In [None]:
# Dictionary of DataFrames (key=search_term) 
tweets_searchterm_dt = {}

# Dictionary of DataFrames (key=username)
tweets_user_dt = {}

# **Authenticate**

## **Google gDrive**

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [None]:
!ls

In [None]:
%cd ./MyDrive/courses/senior_projects/fall2021/nlp_twitter_covid/

## **Twitter OAuth**

In [None]:
#collapse_hide
#for privacy purposes, these keys are encrypted
consumer_key="s25Oa7fnvICpEpHCF6VDcatoq"
consumer_secret="jgXTDIFXfIaNI6DKPZX3U9amuRr8w2z4OH84yZSUBCic6kVdsz"
access_token="1297985939831173120-cP4xtUtJXtSH6WMBUNFbMBWxgUmA5P"
access_token_secret="5cSrDyCyaqvqd6LXiXPsPyAygT8Mt9RjvHjPkFV6eREOV"

In [None]:
#collapse_show
#Accessing twitter API
auth=tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token=(access_token, access_token_secret)
api= tweepy.API(auth, wait_on_rate_limit= True)

In [None]:
# Test tweepy OAuth

tweets= tweepy.Cursor(api.user_timeline,id= 'joebiden', tweet_mode="extended").items(5)

tweet_ls = [tweet for tweet in tweets]

tweet_ls[0].id

In [None]:
# dir(tweet_ls[0])

tweet_ls[0].full_text

# **Utility Functions**

## Clean Text

In [None]:
type(EMOTICONS_EMO)

In [None]:
# UNICODE_EMOJI

In [None]:
def emojis2text(atext):
  for emot, text_desc in UNICODE_EMOJI.items():
    atext = atext.replace(emot, ' '.join(text_desc.replace(",", "").split()))

  # atext = re.sub(r':([^:]*):',r'\1',atext)
  atext = atext.replace('_', ' ').replace(':','')
  # atext2 = atext.replace(':', '')

  return atext

# Test
test_str = "Hilarious 😂. The feeling of making a sale 😎, The feeling of actually ;) fulfilling orders 😒"
test_str = emojis2text(test_str)
print(f'test_str: [{test_str}]')

In [None]:
def emoticons2text(atext):
  for emot, text_desc in EMOTICONS_EMO.items():
    atext = atext.replace(emot, ' '.join(text_desc.replace(",", "").split()))
  return atext

# Test
test_str = "Hilarious :o. The feeling of making a sale :( , The feeling of actually ;) fulfilling orders 😒"
test_str = emoticons2text(test_str)
print(f'test_str: [{test_str}]')

In [None]:
def all_emos2text(atext):
  '''
  Given a text string with embedded emojis and/or emoticons
  Return a expanded text string with all emojis/emoticons translated into text
  '''

  # First, convert emoticons to text
  for emot, text_desc in EMOTICONS_EMO.items():
    atext = atext.replace(emot, ' ' + ' '.join(text_desc.replace(",", " ").split()))

  # Second, convert emojis to text
  for emot, text_desc in UNICODE_EMOJI.items():
    atext = atext.replace(emot, ' ' + ' '.join(text_desc.replace(",", " ").split()))

  atext = re.sub(r':([A-Za-z_]*):',r'\1',atext)
  # atext = re.sub(r'([\w]+)([_])([\w]+)',r'\1 \3',atext)
  atext = re.sub(r'_', ' ', atext)

  return atext

# Test
test_str = "Hilarious 😂. The feeling :o of making a sale 😎, The feeling :( of actually ;) fulfilling orders 😒"
all_emos2text(test_str)

In [None]:
hero.preprocessing.get_default_pipeline()

In [None]:
from texthero import preprocessing

hero_pre_pipeline =  [preprocessing.remove_urls,
                      preprocessing.remove_html_tags]

hero_post_pipeline = [preprocessing.fillna,
                      preprocessing.lowercase,
                      preprocessing.remove_digits,
                      preprocessing.remove_punctuation,
                      preprocessing.remove_diacritics,
                      preprocessing.remove_stopwords,
                      preprocessing.remove_whitespace]

# df['clean_text'] = hero.clean(df['text'], hero_pipeline)
# or
# df['clean_text'] = df['clean_text'].pipe(hero.clean, custom_pipeline)

### [Optional] Slang Translation

In [None]:
# Abbreviation / Slang
# https://www.kaggle.com/nmaguette/up-to-date-list-of-slangs-for-text-preprocessing/notebook

slang = {
    "$" : " dollar ",
    "€" : " euro ",
    "4ao" : "for adults only",
    "a.m" : "before midday",
    "a3" : "anytime anywhere anyplace",
    "aamof" : "as a matter of fact",
    "acct" : "account",
    "adih" : "another day in hell",
    "afaic" : "as far as i am concerned",
    "afaict" : "as far as i can tell",
    "afaik" : "as far as i know",
    "afair" : "as far as i remember",
    "afk" : "away from keyboard",
    "app" : "application",
    "approx" : "approximately",
    "apps" : "applications",
    "asap" : "as soon as possible",
    "asl" : "age, sex, location",
    "atk" : "at the keyboard",
    "ave." : "avenue",
    "aymm" : "are you my mother",
    "ayor" : "at your own risk", 
    "b&b" : "bed and breakfast",
    "b+b" : "bed and breakfast",
    "b.c" : "before christ",
    "b2b" : "business to business",
    "b2c" : "business to customer",
    "b4" : "before",
    "b4n" : "bye for now",
    "b@u" : "back at you",
    "bae" : "before anyone else",
    "bak" : "back at keyboard",
    "bbbg" : "bye bye be good",
    "bbc" : "british broadcasting corporation",
    "bbias" : "be back in a second",
    "bbl" : "be back later",
    "bbs" : "be back soon",
    "be4" : "before",
    "bfn" : "bye for now",
    "blvd" : "boulevard",
    "bout" : "about",
    "brb" : "be right back",
    "bros" : "brothers",
    "brt" : "be right there",
    "bsaaw" : "big smile and a wink",
    "btw" : "by the way",
    "bwl" : "bursting with laughter",
    "c/o" : "care of",
    "cet" : "central european time",
    "cf" : "compare",
    "cia" : "central intelligence agency",
    "csl" : "can not stop laughing",
    "cu" : "see you",
    "cul8r" : "see you later",
    "cv" : "curriculum vitae",
    "cwot" : "complete waste of time",
    "cya" : "see you",
    "cyt" : "see you tomorrow",
    "dae" : "does anyone else",
    "dbmib" : "do not bother me i am busy",
    "diy" : "do it yourself",
    "dm" : "direct message",
    "dwh" : "during work hours",
    "e123" : "easy as one two three",
    "eet" : "eastern european time",
    "eg" : "example",
    "embm" : "early morning business meeting",
    "encl" : "enclosed",
    "encl." : "enclosed",
    "etc" : "and so on",
    "faq" : "frequently asked questions",
    "fawc" : "for anyone who cares",
    "fb" : "facebook",
    "fc" : "fingers crossed",
    "fig" : "figure",
    "fimh" : "forever in my heart", 
    "ft." : "feet",
    "ft" : "featuring",
    "ftl" : "for the loss",
    "ftw" : "for the win",
    "fwiw" : "for what it is worth",
    "fyi" : "for your information",
    "g9" : "genius",
    "gahoy" : "get a hold of yourself",
    "gal" : "get a life",
    "gcse" : "general certificate of secondary education",
    "gfn" : "gone for now",
    "gg" : "good game",
    "gl" : "good luck",
    "glhf" : "good luck have fun",
    "gmt" : "greenwich mean time",
    "gmta" : "great minds think alike",
    "gn" : "good night",
    "g.o.a.t" : "greatest of all time",
    "goat" : "greatest of all time",
    "goi" : "get over it",
    "gps" : "global positioning system",
    "gr8" : "great",
    "gratz" : "congratulations",
    "gyal" : "girl",
    "h&c" : "hot and cold",
    "hp" : "horsepower",
    "hr" : "hour",
    "hrh" : "his royal highness",
    "ht" : "height",
    "ibrb" : "i will be right back",
    "ic" : "i see",
    "icq" : "i seek you",
    "icymi" : "in case you missed it",
    "idc" : "i do not care",
    "idgadf" : "i do not give a damn fuck",
    "idgaf" : "i do not give a fuck",
    "idk" : "i do not know",
    "ie" : "that is",
    "i.e" : "that is",
    "ifyp" : "i feel your pain",
    "IG" : "instagram",
    "iirc" : "if i remember correctly",
    "ilu" : "i love you",
    "ily" : "i love you",
    "imho" : "in my humble opinion",
    "imo" : "in my opinion",
    "imu" : "i miss you",
    "iow" : "in other words",
    "irl" : "in real life",
    "j4f" : "just for fun",
    "jic" : "just in case",
    "jk" : "just kidding",
    "jsyk" : "just so you know",
    "l8r" : "later",
    "lb" : "pound",
    "lbs" : "pounds",
    "ldr" : "long distance relationship",
    "lmao" : "laugh my ass off",
    "lmfao" : "laugh my fucking ass off",
    "lol" : "laughing out loud",
    "ltd" : "limited",
    "ltns" : "long time no see",
    "m8" : "mate",
    "mf" : "motherfucker",
    "mfs" : "motherfuckers",
    "mfw" : "my face when",
    "mofo" : "motherfucker",
    "mph" : "miles per hour",
    "mr" : "mister",
    "mrw" : "my reaction when",
    "ms" : "miss",
    "mte" : "my thoughts exactly",
    "nagi" : "not a good idea",
    "nbc" : "national broadcasting company",
    "nbd" : "not big deal",
    "nfs" : "not for sale",
    "ngl" : "not going to lie",
    "nhs" : "national health service",
    "nrn" : "no reply necessary",
    "nsfl" : "not safe for life",
    "nsfw" : "not safe for work",
    "nth" : "nice to have",
    "nvr" : "never",
    "nyc" : "new york city",
    "oc" : "original content",
    "og" : "original",
    "ohp" : "overhead projector",
    "oic" : "oh i see",
    "omdb" : "over my dead body",
    "omg" : "oh my god",
    "omw" : "on my way",
    "p.a" : "per annum",
    "p.m" : "after midday",
    "pm" : "prime minister",
    "poc" : "people of color",
    "pov" : "point of view",
    "pp" : "pages",
    "ppl" : "people",
    "prw" : "parents are watching",
    "ps" : "postscript",
    "pt" : "point",
    "ptb" : "please text back",
    "pto" : "please turn over",
    "qpsa" : "what happens", #"que pasa",
    "ratchet" : "rude",
    "rbtl" : "read between the lines",
    "rlrt" : "real life retweet", 
    "rofl" : "rolling on the floor laughing",
    "roflol" : "rolling on the floor laughing out loud",
    "rotflmao" : "rolling on the floor laughing my ass off",
    "rt" : "retweet",
    "ruok" : "are you ok",
    "sfw" : "safe for work",
    "sk8" : "skate",
    "smh" : "shake my head",
    "sq" : "square",
    "srsly" : "seriously", 
    "ssdd" : "same stuff different day",
    "tbh" : "to be honest",
    "tbs" : "tablespooful",
    "tbsp" : "tablespooful",
    "tfw" : "that feeling when",
    "thks" : "thank you",
    "tho" : "though",
    "thx" : "thank you",
    "tia" : "thanks in advance",
    "til" : "today i learned",
    "tl;dr" : "too long i did not read",
    "tldr" : "too long i did not read",
    "tmb" : "tweet me back",
    "tntl" : "trying not to laugh",
    "ttyl" : "talk to you later",
    "u" : "you",
    "u2" : "you too",
    "u4e" : "yours for ever",
    "utc" : "coordinated universal time",
    "w/" : "with",
    "w/o" : "without",
    "w8" : "wait",
    "wassup" : "what is up",
    "wb" : "welcome back",
    "wtf" : "what the fuck",
    "wtg" : "way to go",
    "wtpa" : "where the party at",
    "wuf" : "where are you from",
    "wuzup" : "what is up",
    "wywh" : "wish you were here",
    "yd" : "yard",
    "ygtr" : "you got that right",
    "ynk" : "you never know",
    "zzz" : "sleeping bored and tired"
}

In [None]:
def expand_slang(astring):
  words_ls = []
  words_expanded_ls = []
  slang_keys = slang.keys()

  words_ls = astring.split()
  for aword in words_ls:
    if aword.lower() in slang.keys():
      words_expanded_ls.append(slang[aword.lower()])
    else:
      words_expanded_ls.append(aword.lower())

  # abbreviations[word.lower()] if word.lower() in abbreviations.keys() else word

  astring_expanded = ' '.join(words_expanded_ls)

  return astring_expanded 

# Test

expand_slang('idk LOL you suck!')

### [Optional] Spell Correction

In [None]:
# import jamspell

In [None]:
# !wget https://github.com/bakwc/JamSpell-models/raw/master/en.tar.gz

In [None]:
# !ls

In [None]:
# !gunzip en.tar.gz

In [None]:
# !tar -xvf en.tar

In [None]:
# corrector = jamspell.TSpellCorrector()
# corrector.LoadLangModel('en.bin')

In [None]:
# corrector.FixFragment('I am the begt spell cherken!')

### Main: clean_tweet()

In [None]:
from texthero import preprocessing

In [None]:
hero.preprocessing.get_default_pipeline()

In [None]:
# Customize TextHero pipeline

# Create a custom cleaning pipeline
custom_pipeline = [preprocessing.fillna
                   , preprocessing.lowercase
                   , preprocessing.remove_digits
                   , preprocessing.remove_punctuation
                   , preprocessing.remove_diacritics
                   , preprocessing.remove_stopwords
                   , preprocessing.remove_whitespace
                   , preprocessing.stem]
                   
# Test: pass the custom_pipeline to the pipeline argument
# df['clean_title'] = hero.clean(df['title'], pipeline = custom_pipeline)df.head()

In [None]:
def clean_tweet(tweet_df, col_text_raw):
  '''
  Given a DataFrame of Tweets and Column with raw text
  Return a Series of Cleaned Tweet texts
  '''

  text_clean_ser = pd.Series()

  # Remove URLs
  text_clean_ser = hero.remove_urls(tweet_df[col_text_raw])

  # Emoticons and then Emojis to Text
  text_clean_ser = text_clean_ser.apply(lambda x : all_emos2text(x))

  # Expand Slang/Abbr
  text_clean_ser = text_clean_ser.apply(lambda x : expand_slang(x))

  # Expand Contractions
  text_clean_ser = text_clean_ser.apply(lambda x : contractions.fix(x))

  # Clean text: lowercase, remove punctuation/numbers, etc
  # text_clean_ser = text_clean_ser.pipe(hero.clean, hero_pre_pipeline)
  text_clean_ser = hero.clean(text_clean_ser, pipeline = custom_pipeline)

  # Emoji to Text
  # text_clean_ser = text_clean_ser.apply(lambda x : emoji.demojize(x))

  # Emoticons to Text
  # text_clean_ser = text_clean_ser.apply(lambda x : x + ' ' + ' '.join(emot_obj.emoticons(x)['mean']))

  # Slang and contractions
  # https://www.kaggle.com/rizdelhi/socialmediaabbrevations
  # https://www.kaggle.com/longtng/nlp-preprocessing-feature-extraction-methods-a-z
  # https://www.kaggle.com/nmaguette/up-to-date-list-of-slangs-for-text-preprocessing
  # https://github.com/poddarswakhar/Twitter-Analysis-Abbreviation-Slang-Replacement/blob/master/finalSlang.csv 

  # Correct Spelling
  # https://github.com/bakwc/JamSpell 
  # https://github.com/filyp/autocorrect
  # https://www.kaggle.com/longtng/nlp-preprocessing-feature-extraction-methods-a-z (tf)
  # text_clean_ser = text_clean_ser.apply(lambda x : corrector.FixFragment(x))

  # Postprocess Text (lowercase, remove punct/nums, etc)
  # text_clean_ser = text_clean_ser.pipe(hero.clean)
  # text_clean_ser = text_clean_ser.pipe(hero.clean, hero_post_pipeline)

  # pd.Series(tweet_clean_ls)

  return text_clean_ser

# Test

# clean_tweet(tweets_user_df.iloc[:5], 'text')


In [None]:
# tweets_user_df.info()

## Get Tweets

In [None]:
# Getting text from a  search string

def get_user_tweets(username, count):
  '''
  Given a username and count
  Return {count} number of tweets from {username} Twitter account
  '''

  tweet_ls = []
  tweets_df = pd.DataFrame(columns=['tweet_created_dt','tweet_id','user','acct_desc','loc','following','follwers','total',
                                    'user_created_dt','retweet_ct','text','hashtags'])
  try:
    #creating query methods using parameters
    tweets= tweepy.Cursor(api.user_timeline,id= username, tweet_mode="extended").items(count)
    tweet_ls = [tweet for tweet in tweets]

    for tweet in tweet_ls:   # Pull the values
      tweet_id = tweet.id
      username = tweet.user.screen_name
      acctdesc = tweet.user.description
      location = tweet.user.location
      following = tweet.user.friends_count
      followers = tweet.user.followers_count
      totaltweets = tweet.user.statuses_count
      usercreatedts = tweet.user.created_at
      tweetcreatedts = tweet.created_at
      retweetcount = tweet.retweet_count
      hashtags = tweet.entities['hashtags']
      
      try:
        text = tweet.retweeted_status.full_text
      except AttributeError:     # Not a Retweet
        text = tweet.full_text   
        
      # Add the 11 variables to the empty list - ith_tweet:
      ith_tweet = [tweetcreatedts, tweet_id, username, acctdesc, location, following, followers, totaltweets,
                    usercreatedts, retweetcount, text, hashtags]  
                    
      # Append to dataframe - db_tweets
      tweets_df.loc[len(tweets_df)] = ith_tweet

    #creating dataframe from tweets list
    # tweets_df=pd.DataFrame(tweet_ls, columns=['tweet_id','user','acct_desc','loc','following','follwers','total','user_created_dt','tweet_created_dt','retweet_ct','text','hashtags'])
  
  except BaseException as e:
      print('failed on_status', str(e))
      time.sleep(3)

  return tweets_df

# Test

get_user_tweets('joebiden',5)

In [None]:
def get_matching_tweets(text_query, max_no):
  '''
  Given a search string and max_no of results
  Return a DataFrame of matching results
  '''

  tweet_ls = []
  tweets_df = pd.DataFrame(columns=['tweet_created_dt','tweet_id','user','acct_desc','loc','following','follwers','total',
                                    'user_created_dt','retweet_ct','text','hashtags'])

  try:
    #creation of query method using parameters
    tweets= tweepy.Cursor(api.search, q=text_query, tweet_mode="extended", lang='en').items(max_no)

    #getting the information from twitter object
    tweet_ls= [tweet for tweet in tweets]
    #creating a data frame from the list
    # tweet_df_from_query= pd.DataFrame(tweet_list, columns=['Date','tweet_id', 'tweet_text'])
    # results_df = pd.DataFrame(tweet_list, columns=['Date','tweet_id', 'tweet_text'])

    for tweet in tweet_ls:   # Pull the values
      tweet_id = tweet.id
      username = tweet.user.screen_name
      acctdesc = tweet.user.description
      location = tweet.user.location
      following = tweet.user.friends_count
      followers = tweet.user.followers_count
      totaltweets = tweet.user.statuses_count
      usercreatedts = tweet.user.created_at
      tweetcreatedts = tweet.created_at
      retweetcount = tweet.retweet_count
      hashtags = tweet.entities['hashtags']
      
      try:
        text = tweet.retweeted_status.full_text
      except AttributeError:     # Not a Retweet
        text = tweet.full_text   
        
      # Add the 11 variables to the empty list - ith_tweet:
      ith_tweet = [tweetcreatedts, tweet_id, username, acctdesc, location, following, followers, totaltweets,
                    usercreatedts, retweetcount, text, hashtags]  
                    
      # Append to dataframe - db_tweets
      tweets_df.loc[len(tweets_df)] = ith_tweet

    #creating dataframe from tweets list
    # tweets_df=pd.DataFrame(tweet_ls, columns=['tweet_created_dt','tweet_id','user','acct_desc','loc','following','follwers','total','user_created_dt','retweet_ct','text','hashtags'])

  except BaseException as e:
      print('failed_on_status', str(e))
      time.sleep(3)

  return tweets_df

# Test

get_matching_tweets('nuggets', 5)

# **Get Tweets with Tweepy (Pick only ONE Option: (a) or (b))**

## **Option (a): Read from Saved File**

### **Read Datafile: Query by User**

In [None]:
!pwd

In [None]:
%cd ../../../2021f_iphs200_programming_humanity/code

In [None]:
!ls *.csv

In [None]:
# CUSTOMIZE: set to name of users *.csv datafile in directory listed above

# Set your tweets.csv datafile name to this variable 
tweets_user_datafile = 'iphs200_tweets_user.csv'

tweets_user_df = pd.read_csv(tweets_user_datafile) # , index_col=[0])
tweets_user_df.head()

In [None]:
tweets_user_df.shape

In [None]:
tweets_user_df['tweet_created_dt'] = pd.to_datetime(tweets_user_df['tweet_created_dt'])
tweets_user_df['user_created_dt'] = pd.to_datetime(tweets_user_df['user_created_dt'])

tweets_user_df.info()

In [None]:
# Populate users_ls based upon loaded DataFrame['users'] column

users_ls = tweets_user_df['user'].unique()

for i, auser in enumerate(users_ls):
  print(f'Search Term #{i}: {auser}')


### **Read Datafile: Query by Search Term**

In [None]:
!ls *.csv

In [None]:
# CUSTOMIZE: set to name of searchterm *.csv datafile in directory listed above
tweets_searchterms_datafile = 'iphs200_tweets_query.csv'

tweets_searchterm_df = pd.read_csv(tweets_searchterms_datafile) # , index_col=[0])
tweets_searchterm_df.head()

In [None]:
tweets_searchterm_df.shape

In [None]:
tweets_searchterm_df['tweet_created_dt'] = pd.to_datetime(tweets_searchterm_df['tweet_created_dt'])
tweets_searchterm_df['user_created_dt'] = pd.to_datetime(tweets_searchterm_df['user_created_dt'])

tweets_searchterm_df.info()

In [None]:
# Populate searchterms_ls based upon loaded DataFrame['users'] column

searchterms_ls = tweets_searchterm_df['term'].unique()

for i, asearchterm in enumerate(searchterms_ls):
  print(f'Search Term #{i}: {asearchterm}')


```
**[NOTE] If the tweet text is NOT already cleaned (no text_clean col), 
then:
(a) continue with 'Clean Tweet' section below,
else:
 (b) skip to 'Word Frequency' section below**
 ```

### **Read Datafile: Query by Trend**

In [None]:
!ls

In [None]:
"""
twitter_user_datafile = 'tweets_by_trend_20211106-155252.csv'

tweets_by_trend_ = pd.read_csv(twitter_user_datafile)
tweets_by_trend_.head()
""";

## **Option (b): Scrape from Twitter with Tweepy**

* https://docs.tweepy.org/en/latest/

### **Twitter Queries**

#### **Query by User**

**Customize Twitter (a) username list and (b) max_no of tweets to retrieve for each username**

In [None]:
# CUSTOMIZE: add Twitter usernames in the following list

users_ls = ['joebiden', 'CDCgov', 'WHO']

max_no = 15

In [None]:
%%time

# NOTE: 33s

for i, auser in enumerate(users_ls):
  print(f'User #{i}: {auser}')
  tweets_user_dt[auser] = get_user_tweets(auser, max_no)

In [None]:
# Dictionary of DataFrames, one for each user

# View the first few tweets for the first user #0
tweets_user_dt[users_ls[0]].head()

In [None]:
# Merger all User DataFrames into One Master DataFrame: tweets_user_df

tweets_user_df = pd.DataFrame()

for auser, ausertweets_df in tweets_user_dt.items():
  # print(f'key:{auser} - value:{ausertweets_df}')
  ausertweets_df['user'] = [auser] * ausertweets_df.shape[0]
  tweets_user_df = tweets_user_df.append(ausertweets_df, ignore_index=True)

# Sort by Date
tweets_user_df.sort_values(by=['tweet_created_dt'], inplace=True) # , ascending=False)
tweets_user_df.head()

# tweets_user_df.reindex()
# tweets_user_df['Date'] = tweets_user_df['Date'].to_datetime()
# tweets_user_df = tweets_user_df.set_index('Date')
# tweets_user_df.head()

In [None]:
# Check for duplicated Tweet_ids

tweets_user_df['tweet_id'].duplicated().any()

In [None]:
tweets_user_df.info()

In [None]:
# https://stackoverflow.com/questions/43855462/pandas-drop-duplicates-method-not-working-on-dataframe-containing-lists

# tweets_user_df.drop_duplicates()
tweets_user_df = tweets_user_df.loc[tweets_user_df.astype(str).drop_duplicates().index]
tweets_user_df.shape

#### **Query by Search Term**

TODO: Fix this to mirror the 'Query by User' section above

In [None]:
# CUSTOMIZE: add Twitter search terms in the following list

searchterms_ls = ['vaccines', 'masks', 'mandates', 'travel', 'covid', 'reopening', 'ban']

max_no = 15


In [None]:
%%time

for i, aterm in enumerate(searchterms_ls):
  print(f'Term #{i}: {aterm}')
  tweets_searchterm_dt[aterm] = get_matching_tweets(aterm, max_no)

In [None]:
# View the first few tweets for the first searchterm #0

tweets_searchterm_dt[searchterms_ls[0]].head(2)


In [None]:
# Merger all Query DataFrames into One Master DataFrame: tweets_query_all_df

tweets_searchterm_df=pd.DataFrame()

for aterm, atermtweets_df in tweets_searchterm_dt.items():
  atermtweets_df['term'] =[aterm] *atermtweets_df.shape[0]
  tweets_searchterm_df = tweets_searchterm_df.append(atermtweets_df, ignore_index=True)

# Sort by Date
tweets_searchterm_df.sort_values(by=['tweet_created_dt'], inplace=True) # , ascending=False)

tweets_searchterm_df.head(2)


In [None]:
# Check for duplicated Tweet_ids

tweets_searchterm_df['tweet_id'].duplicated().sum()

In [None]:
tweets_searchterm_df.info()

In [None]:
# https://stackoverflow.com/questions/43855462/pandas-drop-duplicates-method-not-working-on-dataframe-containing-lists

# tweets_searchterm_df.drop_duplicates()
tweets_searchterm_df = tweets_searchterm_df.loc[~tweets_searchterm_df['tweet_id'].duplicated(keep='last')]
# tweets_searchterm_df = tweets_searchterm_df.loc[tweets_searchterm_df.astype(str).drop_duplicates().index]
tweets_searchterm_df.shape

In [None]:
tweets_searchterm_df.info()

#### **Query by Trend**

In [None]:
# Trends

tweet_query_trend_df=pd.DataFrame(api.trends_available())
# str(trend['country'])
# trend['country']="United States of America"
tweet_query_trend_df.head()

### **Clean Tweets by User**

In [None]:
tweets_user_df['text_clean'] = clean_tweet(tweets_user_df, 'text')
tweets_user_df.head(2)

### Clean Tweets by Search Term

In [None]:
tweets_searchterm_df['text_clean'] = clean_tweet(tweets_searchterm_df, 'text')
tweets_searchterm_df.head(2)

# Word Frequency

## Query by User

In [None]:
# Create pd.Series of most freq top_n words
 
top_n = 10

tw_user = hero.visualization.top_words(tweets_user_df['text_clean']).head(top_n)

tw_user_df = pd.DataFrame(tw_user)
tw_user_df.sort_values('text_clean',inplace=True)
tw_user_df.head()

In [None]:
# Plot word freq

tw_user_df.plot.barh(y='text_clean', legend='None')
plt.grid(True, alpha=0.3)
plt.title('Word Frequency in Tweets Found by User', fontsize=20)
plt.xlabel('Word Count', fontsize=12)
plt.ylabel('Word Lemma Root', fontsize=12)
plt.show();

## Query by Search Term

In [None]:
# Create pd.Series of most freq top_n words
 
top_n = 10

tw_searchterm = hero.visualization.top_words(tweets_searchterm_df['text_clean']).head(10)

tw_searchterm_df = pd.DataFrame(tw_searchterm)
tw_searchterm_df.sort_values('text_clean',inplace=True)
tw_searchterm_df.head()

In [None]:
# Plot word freq

tw_searchterm_df.plot.barh(y='text_clean', legend='None')
plt.grid(True, alpha=0.3)
plt.title('Word Frequency in Tweets Found by Search Term', fontsize=20)
plt.xlabel('Word Count', fontsize=12)
plt.ylabel('Word Lemma Root', fontsize=12)
plt.plot();

In [None]:
type(tw_user_df['text_clean'])

In [None]:
# TODO: Debug

# hero.visualization.top_words(tw_user_df['text_clean'])

# Word Cloud

In [None]:
!pip install wordcloud

In [None]:
from wordcloud import WordCloud

## Query by User

In [None]:
# Tokenize all the tweeks and save in list

tweets_user_tokens_ls = list(tweets_user_df['text_clean'])
# Split each sentence into tokens
tweets_user_tokens_ls = [x.split() for x in tweets_user_tokens_ls]
# Flatten nested lists
tweets_user_tokens_ls = [atoken for sublist in tweets_user_tokens_ls for atoken in sublist]

print(tweets_user_tokens_ls)

In [None]:
# Remove selected tokens in black list

# CUSTOMIZE: Enter words/tokens to be removed from Word Cloud
token_black_ls = ['amp', 'http', 'https']

temp_ls = [atoken for atoken in tweets_user_tokens_ls if atoken not in token_black_ls]

tweets_user_tokens_ls = temp_ls
tweets_user_tokens_ls[:10]

In [None]:
# Generate a word cloud image

# Create one big string of tokens
tweets_user_str = ' '.join(tweets_user_tokens_ls)

# Generate wordcloud
wordcloud = WordCloud().generate(tweets_user_str)

In [None]:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show();

## Query by Search Term

In [None]:
# Tokenize all the tweeks and save in list

tweets_searchterm_tokens_ls = list(tweets_searchterm_df['text_clean'])
# Split each sentence into tokens
tweets_searchterm_tokens_ls = [x.split() for x in tweets_searchterm_tokens_ls]
# Flatten nested lists
tweets_searchterm_tokens_ls = [atoken for sublist in tweets_searchterm_tokens_ls for atoken in sublist]

print(tweets_searchterm_tokens_ls)

In [None]:
# Remove selected tokens in black list

# CUSTOMIZE: Enter words/tokens to be removed from Word Cloud
token_black_ls = ['"', 'http', 'https']

temp_ls = [atoken for atoken in tweets_searchterm_tokens_ls if atoken not in token_black_ls]

tweets_searchterm_tokens_ls = temp_ls
tweets_searchterm_tokens_ls[:10]

In [None]:
# Generate a word cloud image

# Create one big string of tokens
tweets_searchterm_str = ' '.join(tweets_searchterm_tokens_ls)

# Generate wordcloud
wordcloud = WordCloud().generate(tweets_searchterm_str)

In [None]:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show();

# **Search Retrieved Tweets for any Phrase**

## Query by User

In [None]:
tweets_user_df.head(2)

In [None]:
#@markdown Enter a Text Phrase to Search for in all Retrieved User Tweets:
Search_Phrase = "A Union" #@param {type:"string"}

found_user_df = tweets_user_df.loc[tweets_user_df['text_clean'].str.contains(Search_Phrase, case=False)]

print(f'{found_user_df.shape[0]} Tweets matched your Search_Phrase: {Search_Phrase}')


In [None]:
# Show Matching Tweets

found_user_df

## Query by Search Term

In [None]:
tweets_searchterm_df.head()

In [None]:
#@markdown Enter a Text Phrase to Search for in all Retrieved User Tweets:
Search_Phrase = "A Union" #@param {type:"string"}

found_searchterm_df = tweets_searchterm_df.loc[tweets_searchterm_df['text_clean'].str.contains(Search_Phrase, case=False)]

print(f'{found_searchterm_df.shape[0]} Tweets matched your Search_Phrase: {Search_Phrase}')


In [None]:
# Show Matching Tweets

found_searchterm_df

# **Save to File**

## Query by User

In [None]:
# Save CSV to Google gDrive (if Authenticated, else to temp virtual drive)

timestr = time.strftime("%Y%m%d-%H%M%S")
filename = f'tweets_by_users_{timestr}.csv'

tweets_user_df.to_csv(filename)

**!!! REMEMBER TO MANUALLY DOWNLOAD THE DATAFILE JUST CREATED !!!**

In [None]:
# Save CSV to local laptop

files.download(filename)

## Query by Search Term

In [None]:
# Save CSV to Google gDrive (if Authenticated, else to temp virtual drive)

timestr = time.strftime("%Y%m%d-%H%M%S")
filename = f'tweets_by_searchterm_{timestr}.csv'

tweets_searchterm_df.to_csv(filename)

**!!! REMEMBER TO MANUALLY DOWNLOAD THE DATAFILE JUST CREATED !!!**

In [None]:
# Save CSV to local laptop

files.download(filename)

# **Sentiment Analysis**

## Query by User

In [None]:
# Compute sentiment for each clean_text using VADER

# tweets_user_df['vader_all'] = tweets_user_df['text_clean'].apply(lambda x : vader_sa.polarity_scores(x))

tweets_user_df['vader'] = tweets_user_df['text_clean'].apply(lambda x : vader_sa.polarity_scores(x)['compound'])


In [None]:
tweets_user_df.head(2)

In [None]:
tweets_user_df['tweet_id'].duplicated().count()

In [None]:
tweets_user_df.drop_duplicates(subset=['tweet_id'], keep='last', inplace=True)
tweets_user_df.shape

In [None]:
tweets_user_df.info()

In [None]:
tweets_user_df.sort_values(by='tweet_created_dt', inplace=True)
tweets_user_df.set_index('tweet_created_dt', inplace=True)
tweets_user_df.head(5)

### ONE Raw Plot for ALL Users (Mixed Together)

In [None]:
for i, auser in enumerate(users_ls):
  print(f'User #{i}: {auser}')

print(f'\nPlotting {len(users_ls)} Users')

In [None]:
# For a few tweets, plot raw VADER sentiment values

tweets_user_df['vader'].plot();
plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment for all Users', fontsize=20)
plt.title(f'{users_ls}', fontsize=14)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')


### ONE Raw Plot for ALL Users (Separately)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_user_df.shape[0])

for auser in users_ls:
  tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot(label=auser)

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'Users: {users_ls}', fontsize=12)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=14)
plt.legend(loc='best')
plt.show();

  # tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot();

### ONE Raw Plot for a Selected User

In [None]:
for i, auser in enumerate(users_ls):
  print(f'User #{i}: {auser}')

print(f'\nPlotting {len(users_ls)} Users')

In [None]:
# CUSTOMIZE: Select one of the usernames from above to plot

user_selected = 'sw_columbia'

tweets_user_df[tweets_user_df['user'] == user_selected]['vader'].plot()
plt.grid(True, alpha=0.3)
plt.title(f'Tweet Sentiment \n User: {user_selected}', fontsize=20)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')

### ONE Smoothed Plot for ALL Users (Mixed Together)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_user_df.shape[0])

tweets_user_df['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()
plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment for all Users', fontsize=20)
plt.title(f'{users_ls}', fontsize=14)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')


### ONE Smoothed Plot for ALL Users (Separately)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_user_df.shape[0])

for auser in users_ls:
  tweets_user_df[tweets_user_df['user'] == auser]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot(label=auser)

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'Users: {users_ls}', fontsize=12)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=14)
plt.legend(loc='best')
plt.show();

  # tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot();

### MULTIPLE Smoothed Plot for EACH User

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_user_df.shape[0])

for auser in users_ls:
  tweets_user_df[tweets_user_df['user'] == auser]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()
  plt.grid(True, alpha=0.3)
  plt.title(f'Tweet Sentiment \n User: {auser}', fontsize=20)
  plt.xlabel('Date Tweet Created', fontsize=14)
  plt.ylabel('Sentiment (VADER)', fontsize=14)
  plt.show();
  print('\n')
  # tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot();

### ONE Smoothed Plot for a Selected Users

In [None]:
for i, auser in enumerate(users_ls):
  print(f'User #{i}: {auser}')

print(f'\nPlotting {len(users_ls)} Users')

In [None]:
# CUSTOMIZE: Select one of the usernames from above to plot

user_selected = 'sw_columbia'

tweets_user_df[tweets_user_df['user'] == auser]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'User: {user_selected}', fontsize=20)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')

## Query by Search Term

In [None]:
# Compute sentiment for each clean_text using VADER

# tweets_user_all_df['vader_all'] = tweets_user_all_df['text_clean'].apply(lambda x : vader_sa.polarity_scores(x))

tweets_searchterm_df['vader'] = tweets_searchterm_df['text_clean'].apply(lambda x : vader_sa.polarity_scores(x)['compound'])
tweets_searchterm_df.head(2)

In [None]:
tweets_searchterm_df['tweet_id'].duplicated().sum()

In [None]:
tweets_searchterm_df.shape

In [None]:
tweets_searchterm_df.drop_duplicates(subset=['tweet_id'], keep='last', inplace=True)
tweets_searchterm_df.shape

In [None]:
tweets_searchterm_df.info()

In [None]:
tweets_searchterm_df.sort_values(by='tweet_created_dt', inplace=True)
tweets_searchterm_df.set_index('tweet_created_dt', inplace=True)
tweets_searchterm_df.head(5)

### ONE Raw Plot for ALL Search Terms (Mixed Together)

In [None]:
for i, asearchterm in enumerate(searchterms_ls):
  print(f'Search Term #{i}: {asearchterm}')

print(f'\nPlotting {len(searchterms_ls)} Search Terms')

In [None]:
tweets_searchterm_df.info()

In [None]:
# For a few tweets, plot raw VADER sentiment values

tweets_searchterm_df['vader'].plot();
plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment for all Search Terms', fontsize=20)
plt.title(f'{searchterms_ls}', fontsize=14)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')


### ONE Raw Plot for ALL Search Terms (Separately)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_searchterm_df.shape[0])

for asearchterm in searchterms_ls:
  tweets_searchterm_df[tweets_searchterm_df['term'] == asearchterm]['vader'].plot(label=auser)

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'Search Terms: {searchterms_ls}', fontsize=12)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=14)
plt.legend(loc='best')
plt.show();


### ONE Raw Plot for a Selected Search Term

In [None]:
for i, auser in enumerate(searchterms_ls):
  print(f'User #{i}: {auser}')

print(f'\nPlotting {len(searchterms_ls)} Search Terms')

In [None]:
# CUSTOMIZE: Select one of the usernames from above to plot

searchterm_selected = '#CUonStrike'

tweets_searchterm_df[tweets_searchterm_df['term'] == searchterm_selected]['vader'].plot()
plt.grid(True, alpha=0.3)
plt.title(f'Tweet Sentiment \n User: {searchterm_selected}', fontsize=20)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')

### ONE Smoothed Plot for ALL Search Terms (Mixed Together)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_searchterm_df.shape[0])

tweets_searchterm_df['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()
plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment for all Search Terms', fontsize=20)
plt.title(f'{users_ls}', fontsize=14)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')


### ONE Smoothed Plot for ALL Search Terms (Separately)

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_searchterm_df.shape[0])

for asearchterm in searchterms_ls:
  tweets_searchterm_df[tweets_searchterm_df['term'] == asearchterm]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot(label=asearchterm)

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'Search Terms: {searchterms_ls}', fontsize=12)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=14)
plt.legend(loc='best')
plt.show();

  # tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot();

### MULTIPLE Smoothed Plot for EACH Search Term

In [None]:
# For a few tweets, plot raw VADER sentiment values

win_per = 10
win_size = int(win_per/100 * tweets_searchterm_df.shape[0])

for asearchterm in searchterms_ls:
  tweets_searchterm_df[tweets_searchterm_df['term'] == asearchterm]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()
  plt.grid(True, alpha=0.3)
  plt.suptitle(f'Tweet Sentiment', fontsize=20)
  plt.title(f'Search Term: {asearchterm}', fontsize=14)
  plt.xlabel('Date Tweet Created', fontsize=14)
  plt.ylabel('Sentiment (VADER)', fontsize=14)
  plt.show();
  print('\n')
  # tweets_user_df[tweets_user_df['user'] == auser]['vader'].plot();

### ONE Smoothed Plot for a Selected Search Term

In [None]:
for i, asearchterm in enumerate(searchterms_ls):
  print(f'Search Term #{i}: {asearchterm}')

print(f'\nPlotting {len(searchterms_ls)} Search Terms')

In [None]:
# CUSTOMIZE: Select one of the usernames from above to plot

searchterm_selected = 'union'

tweets_searchterm_df[tweets_searchterm_df['term'] == searchterm_selected]['vader'].rolling(win_size, center=True, min_periods=0).mean().plot()

plt.grid(True, alpha=0.3)
plt.suptitle(f'Tweet Sentiment', fontsize=20)
plt.title(f'Search Term: {searchterm_selected}', fontsize=14)
plt.xlabel('Date Tweet Created', fontsize=14)
plt.ylabel('Sentiment (VADER)', fontsize=12)
plt.show();
print('\n')

## n Top/Bottom Tweets by Sentiment

### Query by User

In [None]:
tweets_user_df.info()

In [None]:
tweets_user_df.iloc[:,[0,1]]

In [None]:
#@markdown How Many n-top/n-bottom Tweets (ranked by Sentiment):

Show_How_Many_Tweets = 22 #@param {type:"slider", min:1, max:100, step:1}


#@markdown Select which Tweet features to Display:
# a_created_dt = True #@param {type:"boolean"}
a_tweet_id = False #@param {type:"boolean"}
a_user = True #@param {type:"boolean"}
a_acct_desc = False #@param {type:"boolean"}
a_loc = False #@param {type:"boolean"}
a_following = False #@param {type:"boolean"}
a_followers = False #@param {type:"boolean"}
a_total = False #@param {type:"boolean"}
a_user_created_dt = False #@param {type:"boolean"}
a_retweet_ct = False #@param {type:"boolean"}
a_text = True #@param {type:"boolean"}
a_hashtags = False #@param {type:"boolean"}
a_text_clean = False #@param {type:"boolean"}
a_vader = True #@param {type:"boolean"}

#@markdown Display in Ascending Sentiment Order?
Ascending_Sentiment = True #@param {type:"boolean"}

cols_selected_ls = []

# if a_created_dt:
#   cols_selected_ls.append('index.name')
if a_tweet_id:
  cols_selected_ls.append('tweet_id')
if a_user:
  cols_selected_ls.append('user')
if a_acct_desc:
  cols_selected_ls.append('acct_desc')
if a_loc:
  cols_selected_ls.append('loc')
if a_following:
  cols_selected_ls.append('following')
if a_followers:
  cols_selected_ls.append('followers')
if a_total:
  cols_selected_ls.append('total')
if a_user_created_dt:
  cols_selected_ls.append('user_created_dt')
if a_retweet_ct:
  cols_selected_ls.append('retweet_ct')
if a_text:
  cols_selected_ls.append('text')
if a_hashtags:
  cols_selected_ls.append('hashtags')
if a_text_clean:
  cols_selected_ls.append('text_clean')
if a_vader:
  cols_selected_ls.append('vader')

tweets_user_df.sort_values(by='vader', ascending=Ascending_Sentiment).iloc[:Show_How_Many_Tweets][cols_selected_ls]


### Query by Search Term

In [None]:
tweets_searchterm_df.info()

In [None]:
tweets_searchterm_df.iloc[:,[0,1]]

In [None]:
#@markdown How Many n-top/n-bottom Tweets (ranked by Sentiment):

Show_How_Many_Tweets = 22 #@param {type:"slider", min:1, max:100, step:1}


#@markdown Select which Tweet features to Display:
# a_created_dt = True #@param {type:"boolean"}
a_tweet_id = False #@param {type:"boolean"}
a_user = True #@param {type:"boolean"}
a_acct_desc = False #@param {type:"boolean"}
a_loc = False #@param {type:"boolean"}
a_following = False #@param {type:"boolean"}
a_followers = False #@param {type:"boolean"}
a_total = False #@param {type:"boolean"}
a_user_created_dt = False #@param {type:"boolean"}
a_retweet_ct = False #@param {type:"boolean"}
a_text = True #@param {type:"boolean"}
a_hashtags = False #@param {type:"boolean"}
a_text_clean = False #@param {type:"boolean"}
a_vader = True #@param {type:"boolean"}

#@markdown Display in Ascending Sentiment Order?
Ascending_Sentiment = False #@param {type:"boolean"}

cols_selected_ls = []

# if a_created_dt:
#   cols_selected_ls.append('index.name')
if a_tweet_id:
  cols_selected_ls.append('tweet_id')
if a_user:
  cols_selected_ls.append('user')
if a_acct_desc:
  cols_selected_ls.append('acct_desc')
if a_loc:
  cols_selected_ls.append('loc')
if a_following:
  cols_selected_ls.append('following')
if a_followers:
  cols_selected_ls.append('followers')
if a_total:
  cols_selected_ls.append('total')
if a_user_created_dt:
  cols_selected_ls.append('user_created_dt')
if a_retweet_ct:
  cols_selected_ls.append('retweet_ct')
if a_text:
  cols_selected_ls.append('text')
if a_hashtags:
  cols_selected_ls.append('hashtags')
if a_text_clean:
  cols_selected_ls.append('text_clean')
if a_vader:
  cols_selected_ls.append('vader')

tweets_searchterm_df.sort_values(by='vader', ascending=Ascending_Sentiment).iloc[:Show_How_Many_Tweets][cols_selected_ls]


# **END OF NOTEBOOK**