## Sentiment Analysis - Data Gathering

We will be using the **Social Network Scraper** library to scrap from couple of website

if you need to install snscrape use

In [13]:
#pip install snscrape
#pip install git+https://github.com/JustAnotherArchivist/snscrape.git

In [14]:
# Importing all the necessary library
#importing needed files
import snscrape.modules.twitter as sntwitter
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
%matplotlib inline

In [15]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer

import string
import re
import textblob
from textblob import TextBlob

In [5]:
from wordcloud import WordCloud, STOPWORDS
from emot.emo_unicode import UNICODE_EMOJI
from wordcloud import ImageColorGenerator
from PIL import Image

In [6]:
lemmatizer = WordNetLemmatizer()

### How to Scrape Tweets from a User with Snscrape

In [11]:
# Created a list to append all tweet attributes(data)
punch_news = []

# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:MobilePunch').get_items()):
    if i>2000:
        break
    punch_news.append([tweet.id, tweet.date, tweet.likeCount, tweet.replyCount, tweet.sourceLabel, tweet.rawContent,tweet.url,tweet.user.username,tweet.user.location])
    
# Creating a dataframe from the tweets list above 
punch_tweet = pd.DataFrame(punch_news, columns=["id","date_created", "no_of_likes", "no_of_reply", "source", "tweets", "url","username", "location"])


punch_tweet.to_csv('punchnews.csv', mode = 'a')

In [12]:
punch_tweet.head()

Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,1561737743931146240,2022-08-22 15:31:08+00:00,2,0,Twitter for iPhone,Credit: Instagram| funkejenifaakindele\n\n--\n...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
1,1561737740395454465,2022-08-22 15:31:07+00:00,6,1,Twitter for iPhone,In a TikTok video that she posted on her Insta...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
2,1561737735412621313,2022-08-22 15:31:06+00:00,24,1,Twitter for iPhone,Funke Akindele Excited About 45th Birthday\n \...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
3,1561736129560428545,2022-08-22 15:24:43+00:00,15,1,Echobox,APC chieftain seeks appointment of more judges...,https://twitter.com/MobilePunch/status/1561736...,MobilePunch,"Lagos, Nigeria"
4,1561734896120463366,2022-08-22 15:19:49+00:00,84,1,Echobox,"Vote out bad leaders, LP chieftain urges youth...",https://twitter.com/MobilePunch/status/1561734...,MobilePunch,"Lagos, Nigeria"


In [19]:
# read the csv file
punch = pd.read_csv('punchnews.csv', encoding = 'unicode_escape')

In [20]:
punch.head()

Unnamed: 0.1,Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,0,1561737743931146240,2022-08-22 15:31:08+00:00,2,0,Twitter for iPhone,Credit: Instagram| funkejenifaakindele\n\n--\n...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
1,1,1561737740395454465,2022-08-22 15:31:07+00:00,6,1,Twitter for iPhone,In a TikTok video that she posted on her Insta...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
2,2,1561737735412621313,2022-08-22 15:31:06+00:00,24,1,Twitter for iPhone,Funke Akindele Excited About 45th Birthday\n \...,https://twitter.com/MobilePunch/status/1561737...,MobilePunch,"Lagos, Nigeria"
3,3,1561736129560428545,2022-08-22 15:24:43+00:00,15,1,Echobox,APC chieftain seeks appointment of more judges...,https://twitter.com/MobilePunch/status/1561736...,MobilePunch,"Lagos, Nigeria"
4,4,1561734896120463366,2022-08-22 15:19:49+00:00,84,1,Echobox,"Vote out bad leaders, LP chieftain urges youth...",https://twitter.com/MobilePunch/status/1561734...,MobilePunch,"Lagos, Nigeria"


### How to Scrape Tweets from a Text Search with Snscrape

In [24]:
query = 'Peter Obi OR Atiku OR Tinubu OR #Obidient OR #Atikulated OR #Batified OR #BAT2023 OR #OBI2023 OR #Atiku2023 since:2021-01-01 until:2022-08-30'
politics = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
    if i>20000:
        break
    else:
        politics.append([tweet.id, tweet.date, tweet.likeCount, tweet.replyCount, tweet.sourceLabel, 
                       tweet.rawContent,tweet.url,tweet.user.username,tweet.user.location])

        
        
politics = pd.DataFrame(politics, columns = ["id","date_created", "no_of_likes", 
                                       "no_of_reply", "source", "tweets", "url","username", "location"])

politics.to_csv('politics.csv', mode = 'a')

In [25]:
politics.head()

Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,1561791456376938499,2022-08-22 19:04:34+00:00,0,0,Twitter for Android,@NigerianAmazon Nwa Ada Igbo,https://twitter.com/Azuanuka_1/status/15617914...,Azuanuka_1,Ebenine
1,1561791454502076416,2022-08-22 19:04:34+00:00,0,0,Twitter for iPhone,@UchePOkoye Peter obi their mate??? Na they ar...,https://twitter.com/mojo157s/status/1561791454...,mojo157s,
2,1561791450559422467,2022-08-22 19:04:33+00:00,0,0,Twitter for Android,@valerian247 @onlychiko @Egi_nupe_ @Oluwanonso...,https://twitter.com/Obiageriaku1/status/156179...,Obiageriaku1,South Korea with Choi woo shik
3,1561791441503936512,2022-08-22 19:04:30+00:00,0,0,Twitter for Android,@roooooneeeey @DrJoeAbah @PeterObi Tel me the ...,https://twitter.com/Iam_abdulmykeL/status/1561...,Iam_abdulmykeL,Nigeria
4,1561791440631504898,2022-08-22 19:04:30+00:00,0,0,Twitter for Android,@ayamdapsonfrosh @fortuna_dame Na wa ohh why n...,https://twitter.com/ibeanuchiderag1/status/156...,ibeanuchiderag1,"Lagos, Nigeria"


In [27]:
# read the csv file
politic = pd.read_csv('politics.csv')

In [28]:
politic.head()

Unnamed: 0.1,Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,0,1561791456376938499,2022-08-22 19:04:34+00:00,0,0,Twitter for Android,@NigerianAmazon Nwa Ada Igbo,https://twitter.com/Azuanuka_1/status/15617914...,Azuanuka_1,Ebenine
1,1,1561791454502076416,2022-08-22 19:04:34+00:00,0,0,Twitter for iPhone,@UchePOkoye Peter obi their mate??? Na they ar...,https://twitter.com/mojo157s/status/1561791454...,mojo157s,
2,2,1561791450559422467,2022-08-22 19:04:33+00:00,0,0,Twitter for Android,@valerian247 @onlychiko @Egi_nupe_ @Oluwanonso...,https://twitter.com/Obiageriaku1/status/156179...,Obiageriaku1,South Korea with Choi woo shik
3,3,1561791441503936512,2022-08-22 19:04:30+00:00,0,0,Twitter for Android,@roooooneeeey @DrJoeAbah @PeterObi Tel me the ...,https://twitter.com/Iam_abdulmykeL/status/1561...,Iam_abdulmykeL,Nigeria
4,4,1561791440631504898,2022-08-22 19:04:30+00:00,0,0,Twitter for Android,@ayamdapsonfrosh @fortuna_dame Na wa ohh why n...,https://twitter.com/ibeanuchiderag1/status/156...,ibeanuchiderag1,"Lagos, Nigeria"


### How to Scrape Tweets from a User (@PeterObi) with Snscrape

In [33]:
# Created a list to append all tweet attributes(data)
peter = []

# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:PeterObi until:2022-08-30').get_items()):
    if i>20000:
        break
    peter.append([tweet.id, tweet.date, tweet.likeCount, tweet.replyCount, tweet.sourceLabel, tweet.rawContent,tweet.url,tweet.user.username,tweet.user.location])
    
# Creating a dataframe from the tweets list above 
peter = pd.DataFrame(peter, columns=["id","date_created", "no_of_likes", "no_of_reply", "source", "tweets", "url","username", "location"])


peter.to_csv('peter_obi.csv', mode = 'a')

In [35]:
peter.head()

Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,1561371318158852098,2022-08-21 15:15:05+00:00,24656,871,Twitter for Android,Congratulations to @Chigozielalex and spouse f...,https://twitter.com/PeterObi/status/1561371318...,PeterObi,Nigeria
1,1561277831375687681,2022-08-21 09:03:36+00:00,5026,67,Twitter for Android,"May God grant her eternal peace, and also gran...",https://twitter.com/PeterObi/status/1561277831...,PeterObi,Nigeria
2,1561277828481667072,2022-08-21 09:03:36+00:00,17881,591,Twitter for Android,"I joined my dear friend and brother, Chief Ndu...",https://twitter.com/PeterObi/status/1561277828...,PeterObi,Nigeria
3,1561097016515694595,2022-08-20 21:05:07+00:00,11369,293,Twitter for Android,Our prayer is that Nigeria’s coming election w...,https://twitter.com/PeterObi/status/1561097016...,PeterObi,Nigeria
4,1561096992754909184,2022-08-20 21:05:01+00:00,26908,987,Twitter for Android,I join the lovers of democracy worldwide to co...,https://twitter.com/PeterObi/status/1561096992...,PeterObi,Nigeria


In [36]:
# read the csv file
peter_obi = pd.read_csv('peter_obi.csv', encoding = 'unicode_escape')

In [37]:
peter_obi.head()

Unnamed: 0.1,Unnamed: 0,id,date_created,no_of_likes,no_of_reply,source,tweets,url,username,location
0,0,1561371318158852098,2022-08-21 15:15:05+00:00,24656,871,Twitter for Android,Congratulations to @Chigozielalex and spouse f...,https://twitter.com/PeterObi/status/1561371318...,PeterObi,Nigeria
1,1,1561277831375687681,2022-08-21 09:03:36+00:00,5026,67,Twitter for Android,"May God grant her eternal peace, and also gran...",https://twitter.com/PeterObi/status/1561277831...,PeterObi,Nigeria
2,2,1561277828481667072,2022-08-21 09:03:36+00:00,17881,591,Twitter for Android,"I joined my dear friend and brother, Chief Ndu...",https://twitter.com/PeterObi/status/1561277828...,PeterObi,Nigeria
3,3,1561097016515694595,2022-08-20 21:05:07+00:00,11369,293,Twitter for Android,Our prayer is that Nigeriaâs coming election...,https://twitter.com/PeterObi/status/1561097016...,PeterObi,Nigeria
4,4,1561096992754909184,2022-08-20 21:05:01+00:00,26908,987,Twitter for Android,I join the lovers of democracy worldwide to co...,https://twitter.com/PeterObi/status/1561096992...,PeterObi,Nigeria
