#### Get Twitter Data

In [None]:
import tweepy as tw
from tweepy import OAuthHandler
from dotenv import load_dotenv
import json
import csv

def get_twitter_data(screen_name):
    # load data for auth to twitter
    load_dotenv(dotenv_path="C:/Users/brett/Desktop/FTBC/.env")
    api_key = os.getenv('TWITTER_API_KEY')
    api_secret_key = os.getenv('TWITTER_SECRET_KEY')
    bearer_token = os.getenv('TWITTER_BEARER_TOKEN')
    access_token = os.getenv('TWITTER_ACCESS_TOKEN')
    access_s_token = os.getenv("TWITTER_SECRET_TOKEN")
    
    auth = tw.OAuthHandler(api_key, api_secret_key)
    auth.set_access_token(access_token, access_s_token)

    api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    
    # First request to get max count allowed to initialize user_tweets varible to allow iterating to get more tweets
    tweets = []
    total_tweets = 0
    user_tweets = api.user_timeline(screen_name = screen_name,
                                    include_rts=False,
                                    tweet_mode='extended',
                                    count = 200)
    # add user_tweets to tweets list
    tweets.extend(user_tweets)
    total_tweets += len(user_tweets)

    # loop to pull max amount of tweets allowed by twitter (3200)
    while len(user_tweets) > 0:
        # varialbe to be used as max_id for loop
        last_id = user_tweets[-1].id - 1
        
        user_tweets = api.user_timeline(screen_name = screen_name,
                                        max_id = last_id - 1,
                                        include_rts=False,
                                        tweet_mode='extended',
                                        count = 200)
        
        # add additional tweets to original list
        tweets.extend(user_tweets)
        
        print(f'{len(user_tweets)} have been downloaded for {screen_name}')
        total_tweets += len(user_tweets) 
    
    print(f'{total_tweets} downloaded for {screen_name}')
    # extract the data that is needed from the tweet data
    tweet_details = [[tweet.user.screen_name, tweet.created_at, tweet.full_text, tweet.favorite_count, tweet.retweet_count] for tweet in tweets]
    
    # Push extracted data to csv for use later
    with open(f'Data/{screen_name}_tweets.csv', 'w', encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(['screen_name', 'date', 'tweet', 'likes', 'retweets'])
        writer.writerows(tweet_details)  

#### Clean Data

In [None]:
# Read in json files
boonepickens_path = Path('Data/boonepickens_tweets.csv')
chrismartenson_path = Path('Data/chrismartenson_tweets.csv')
chrisnelder_path = Path('Data/chrisnelder_tweets.csv')
collineatonhc_path = Path('Data/CollinEatonHC_tweets.csv')
gasbuddyguy_path = Path('Data/GasBuddyGuy_tweets.csv')
jendlouhyhc_path = Path('Data/jendlouhyhc_tweets.csv')
jkempenergy_path = Path('Data/JKempEnergy_tweets.csv')
robinenergy_path = Path('Data/robinenergy_tweets.csv')
staunovo_path = Path('Data/staunovo_tweets.csv')
thearorareport_path = Path('Data/TheAroraReport_tweets.csv')

# Convert to dfs
boonepickens_df = pd.read_csv(boonepickens_path, parse_dates=True, infer_datetime_format=True)
chrismartenson_df = pd.read_csv(chrismartenson_path, parse_dates=True, infer_datetime_format=True)
chrisnelder_df = pd.read_csv(chrisnelder_path, parse_dates=True, infer_datetime_format=True)
collineatonhc_df = pd.read_csv(collineatonhc_path, parse_dates=True, infer_datetime_format=True)
gasbuddyguy_df = pd.read_csv(gasbuddyguy_path, parse_dates=True, infer_datetime_format=True)
jendlouhyhc_df = pd.read_csv(jendlouhyhc_path, parse_dates=True, infer_datetime_format=True)
jkempenergy_df = pd.read_csv(jkempenergy_path, parse_dates=True, infer_datetime_format=True)
robinenergy_df = pd.read_csv(robinenergy_path, parse_dates=True, infer_datetime_format=True)
staunovo_df = pd.read_csv(staunovo_path, parse_dates=True, infer_datetime_format=True)
thearorareport_df = pd.read_csv(thearorareport_path, parse_dates=True, infer_datetime_format=True)

# Combine into 1 dataframe
all_tweets_df = pd.concat([boonepickens_df, chrismartenson_df,
                         chrisnelder_df, collineatonhc_df,
                         gasbuddyguy_df, jendlouhyhc_df,
                         jkempenergy_df, robinenergy_df,
                         staunovo_df, thearorareport_df]).reset_index(drop=True)

# Normalize the Date field
all_tweets_df['date'] = pd.to_datetime(all_tweets_df['date'], errors='coerce')
all_tweets_df['date'] = all_tweets_df['date'].dt.normalize()

# Remove the url from the tweet
tweets_wo_url = []
for tweet in all_tweets_df['tweet']:
    no_url = re.sub(r"http\S+", "", tweet)
    tweets_wo_url.append({
            "tweets_no_url": no_url}
    )
tweets_wo_url_df = pd.DataFrame(tweets_wo_url)

# Append no_url_tweets to all_tweets_df and remove tweets column
all_tweets_df= (pd.merge(all_tweets_df, tweets_wo_url_df, left_index=True, right_index=True, how='inner')
                .drop(columns='tweet')
               )

# Display df
pd.set_option('max_colwidth', 200)
all_tweets_df.head()

### Sentiment Analysis

#### Vader Sentiment Analysis

In [None]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

tweets_sentiment = []

for tweet in all_tweets_df['tweets_no_url']:
    try:
        text = tweet
        sentiment = analyzer.polarity_scores(tweet)
        compound = sentiment["compound"]
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]

        tweets_sentiment.append({
            "vader compound": compound,
            "vader positive": pos,
            "vader negative": neg,
            "vader neutral": neu 
        })
    except AttributeError:
        pass

# create Df
vader_df = pd.DataFrame(tweets_sentiment)

#### Textblob Sentiment Analysis

In [None]:
# Imports needed
from textblob import TextBlob

# Analysis
blob_sentiment = []

for tweet in all_tweets_df['tweets_no_url']:
    blob = TextBlob(tweet)
    blob.sentiment
    polarity = blob.sentiment[0]
    subjectivity = blob.sentiment[1]

    blob_sentiment.append({
        'Textblob Polarity': polarity,
        'Textblob Subjectivity': subjectivity
    })

# create Df
textblob_df = pd.DataFrame(blob_sentiment)
textblob_df.head()

#### Merge all_tweets_df with the 2 sentiment dataframes

In [None]:
# Merge all_tweets_df with the 2 sentiment dataframes
tweet_sentiment_df = pd.concat([all_tweets_df, vader_df, textblob_df], axis="columns", join='inner')
pd.set_option('max_colwidth', 100)
tweet_sentiment_df.head()

In [None]:
# Save Df to csv file
tweet_sentiment_df.to_csv('Data/tweet_sentiment_df.csv')