# KDSP_Twitter_Data_Analysis
In Project 5 "Twitter Data Analysis", data from Twitter need to be fetched from a specific event, e.g. #football.
1. Derive the sentiment of each tweet using Python module
2. Top 10 hashtags and users based on their number of tweets in the data set
3. Get the followers of a given twitter user from your acquired data set
4. Given a twitter user, obtain the tweets and profiles of all followers of the user and show it.

In [117]:
event = '#DUUUVAL'
#maximum count of follower
follower_count = 10
#maximum count of tweets per user
tweet_count = 10

-Access Twitter API

In [118]:
import yaml
import tweepy

# load the credential data from yaml file
def process_yaml(credentials):
    with open(credentials) as c:
        return yaml.safe_load(c)


# extract consumer key and secret
def create_keys(access_data):
    return access_data["twitter_api"]["consumer_key"], access_data["twitter_api"]["consumer_secret"]


# get access to the Twitter API
def twitter_auth(consumer_key, consumer_secret):
    auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
    api = tweepy.API(auth, wait_on_rate_limit=True)
    return api

access_data = process_yaml("credentials.yaml")
consumer_key, consumer_secret = create_keys(access_data)
api = twitter_auth(consumer_key, consumer_secret)



Pands Dataframe formatting

In [119]:
import pandas as pd

#format output of pandas DataFrame
#pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
#pd.set_option('display.max_colwidth', 50)

-load Tweets for chosen event

In [120]:
import numpy as np

def twitter_search(api, search):
    search_results = api.search(q=search, lang='en', count='100', tweet_mode='extended', wait_on_rate_limit='True')
    return search_results

tweets = twitter_search(api, event)
df_tweets = pd.DataFrame([tweet.full_text for tweet in tweets], columns=['Tweets'])
df_tweets['Screen_Name'] = np.array([tweet.user.screen_name for tweet in tweets])
print(df_tweets)

                                               Tweets     Screen_Name
0   Honoring those who made the ultimate sacrifice...      theotherAP
1   RT @AdamHulseSports: Reflecting on the Jalen R...     FantasyTurf
2   Jacksonville you have been awesome #AEWDoN #AE...        matymils
3   RT @SilversteinAdam: Chris Jericho: "Have a gr...        chadkoon
4   RT @AdamHulseSports: Reflecting on the Jalen R...     NFLProPicks
..                                                ...             ...
80  RT @SKWrestling_: You are a legend, @CoachUrba...      mjtiscione
81  You are a legend, @CoachUrbanMeyer. 🤣🤣🤣 #Doubl...    SKWrestling_
82  Urban Meyer and Charlie Strong making a cameo ...    GigiMorale92
83  RT @willforthrill: Coach Meyer and Strong are ...   engelhartnick
84  Jaguars coach Urban Meyer gets involved in AEW...  BetUS_Official

[85 rows x 2 columns]


-analyse sentiment

In [121]:
import re
from textblob import TextBlob
# Create a function to clean the tweets
def cleanTxt(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text)  # Remove @mentions
    text = re.sub(r'#', '', text)  # Remonving the '#' symbol
    text = re.sub(r'RT[\s]: ', '', text)  # Removing RT
    text = re.sub(r'https?:\/\/\S+', '', text)  # Remove the hyper link

    return text


# Create a function to get the subjectivity
def getSubjectivity(text):
    return TextBlob(text).sentiment.subjectivity


# Create a function to get the polarity
def getPolarity(text):
    return TextBlob(text).sentiment.polarity

# Create a function to compute the negative, neutral and positive analysis
def getAnalysis(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

df_tweets['Tweets_Sentiment'] = df_tweets['Tweets'].apply(cleanTxt)

#df['Subjectivity'] = df['Tweets'].apply(getSubjectivity)
df_tweets['Polarity'] = df_tweets['Tweets_Sentiment'].apply(getPolarity)
df_tweets['Analysis'] = df_tweets['Polarity'].apply(getAnalysis)
print(df_tweets)

                                               Tweets     Screen_Name  \
0   Honoring those who made the ultimate sacrifice...      theotherAP   
1   RT @AdamHulseSports: Reflecting on the Jalen R...     FantasyTurf   
2   Jacksonville you have been awesome #AEWDoN #AE...        matymils   
3   RT @SilversteinAdam: Chris Jericho: "Have a gr...        chadkoon   
4   RT @AdamHulseSports: Reflecting on the Jalen R...     NFLProPicks   
..                                                ...             ...   
80  RT @SKWrestling_: You are a legend, @CoachUrba...      mjtiscione   
81  You are a legend, @CoachUrbanMeyer. 🤣🤣🤣 #Doubl...    SKWrestling_   
82  Urban Meyer and Charlie Strong making a cameo ...    GigiMorale92   
83  RT @willforthrill: Coach Meyer and Strong are ...   engelhartnick   
84  Jaguars coach Urban Meyer gets involved in AEW...  BetUS_Official   

                                     Tweets_Sentiment  Polarity  Analysis  
0   Honoring those who made the ultimate sacrif

-Top 10 Hashtag and User

In [122]:
def getHashtags(tweets, event):
    hashtags = []
    for tweet in tweets:
        for word in tweet.lower().split(' '):
            if word.startswith('#'):
                word = re.search('#[a-z0-9]*', word)
                word = word.group(0)
                if word != event.lower():
                    hashtags.append(word)
    return hashtags;

print(df_tweets['Screen_Name'].value_counts().nlargest(10))

df_hashtags = pd.DataFrame(getHashtags(df_tweets['Tweets'], event), columns=['Hashtags'])
print(df_hashtags['Hashtags'].value_counts().nlargest(10))

JAXcommish         3
ComeFlyWitBlacK    3
CelesteZona        2
Slipmaggot8        1
zero_kool2345      1
Advil911           1
professafloyd88    1
Slicknickshady     1
PeterMtz505        1
ESSNTLWRESTLING    1
Name: Screen_Name, dtype: int64
#doubleornothing       52
#jaguars               49
#aew                    7
#aewdoubleornothing     4
#longhorns              3
#stadiumstampede        3
#nfl                    3
#jacksonville           2
#nfltwitter             2
#aewdon                 2
Name: Hashtags, dtype: int64


-follower of a given Twitter user from the acquired data set

In [123]:
def twitterFollower(api, user, count):
    followers = tweepy.Cursor(api.followers, user).items(count)
    print(followers.next())
    return followers

followers = twitterFollower(api, 'Jaguars', follower_count)
df_follower = pd.DataFrame([follower.screen_name for follower in followers], columns=['Follower'])
print(df_follower)

User(_api=<tweepy.api.API object at 0x000001E59E43DA30>, _json={'id': 1334728630354800642, 'id_str': '1334728630354800642', 'name': 'jonathan', 'screen_name': 'jkurzawa1206', 'location': '', 'description': '', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 0, 'friends_count': 99, 'listed_count': 0, 'created_at': 'Fri Dec 04 05:18:08 +0000 2020', 'favourites_count': 2, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 0, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png', 'profile_image_url_https': 'https://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png', 'profile_link

-profiles and data of followers

In [2]:
from tweepy import TweepError

def getFollowersProfiles(api, followers, count):
    profiles = []
    for follower in followers:
        try:
            line = tweepy.Cursor(api.user_timeline, follower).items(count)
            print("--------------" + str(line.next().user))
            for tweet in line:
                print(tweet.text)
        except TweepError:
            print("Not allowed to access profile!")
        except StopIteration:
            pass

followers_Profiles = getFollowersProfiles(api, df_follower['Follower'], tweet_count)

df_follower['location'] = np.array([profile.user.follower for profile in followers_Profiles])

ModuleNotFoundError: No module named 'tweepy'