<a href="https://www.kaggle.com/code/gpreda/collect-nigerian-presidential-election-2023-tweets?scriptVersionId=134264665" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Introduction

This is a Notebook for collecting tweets about Nigerian Presidential Election 2023.
Relevant hashtags like: Tinubu" , "Obi",  "Atiku", "Cityboy", "Obidatti", "TinubuShettima", " NigeriaDecides2023" are included.
As well, tweets from specific candidates official accounts might be collected.


# Install packages

In [1]:
!pip install tweepy

Collecting tweepy
  Downloading tweepy-4.14.0-py3-none-any.whl (98 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.5/98.5 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tweepy
Successfully installed tweepy-4.14.0
[0m

# Import packages

In [2]:
import os
import tweepy as tw
import pandas as pd
from tqdm import tqdm
import datetime

In [3]:
print(f"Day: {datetime.datetime.now().day}\nMonth: {datetime.datetime.now().month}\nYear: {datetime.datetime.now().year}")

Day: 20
Month: 6
Year: 2023


# Initialize secrets

In [4]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# Twitter connection and cursor

In [5]:
def twitter_connection():
    consumer_api_key = user_secrets.get_secret("TWITTER_CONSUMER_API_KEY")
    consumer_api_secret = user_secrets.get_secret("TWITTER_CONSUMER_API_SECRET")

    auth = tw.OAuthHandler(consumer_api_key, consumer_api_secret)
    api = tw.API(auth, wait_on_rate_limit=True)
    return api


def create_cursor(api, search_words, date_since, language="en", items_limit=3000):
    
    # Collect tweets
    tweets = tw.Cursor(api.search_tweets,
                  q=search_words,
                  lang=language,
                  since=date_since).items(items_limit)


    print(f"retreive new tweets ...")
    tweets_copy = []
    for tweet in tqdm(tweets):
        tweets_copy.append(tweet)
     
    print(f"new tweets retrieved: {len(tweets_copy)}")

    return tweets_copy

# Build dataset

In [6]:
def build_dataset(tweets_copy):
    tweets_df = pd.DataFrame()
    for tweet in tqdm(tweets_copy):
        hashtags = []
        try:
            for hashtag in tweet.entities["hashtags"]:
                hashtags.append(hashtag["text"])
        except:
            pass
        tweets_df = tweets_df.append(pd.DataFrame({'id': tweet.id,
                                                   'user_name': tweet.user.name, 
                                                   'user_location': tweet.user.location,\
                                                   'user_description': tweet.user.description,
                                                   'user_created': tweet.user.created_at,
                                                   'user_followers': tweet.user.followers_count,
                                                   'user_friends': tweet.user.friends_count,
                                                   'user_favourites': tweet.user.favourites_count,
                                                   'user_verified': tweet.user.verified,
                                                   'date': tweet.created_at,
                                                   'text': tweet.text, 
                                                   'hashtags': [hashtags if hashtags else None],
                                                   'source': tweet.source,
                                                   'retweets': tweet.retweet_count,
                                                   'favorites': tweet.favorite_count,
                                                   'is_retweet': tweet.retweeted}, index=[0]))
    return tweets_df

# Update and save dataset

In [7]:
def update_and_save_dataset(tweets_df): 
    input_file_path = "../input/nigerian-presidential-election-2023-tweets/nigerian_presidential_election_2023_tweets.csv"
    output_file_path = "nigerian_presidential_election_2023_tweets.csv"
    if os.path.exists(input_file_path):
        tweets_old_df = pd.read_csv(input_file_path)
        print(f"past tweets: {tweets_old_df.shape}")
        tweets_all_df = pd.concat([tweets_old_df, tweets_df], axis=0)
        print(f"new tweets: {tweets_df.shape[0]} past tweets: {tweets_old_df.shape[0]} all tweets: {tweets_all_df.shape[0]}")
        tweets_new_df = tweets_all_df.drop_duplicates(subset = ["id"], keep='last', inplace=False)
        print(f"all tweets: {tweets_new_df.shape}")
        tweets_new_df.to_csv(output_file_path, index=False)
    else:
        print(f"tweets: {tweets_df.shape}")
        tweets_df.to_csv(output_file_path, index=False)
    

# Run it all

In [8]:
queries = ["#Tinubu -filter:retweets", "#Obi -filter:retweets", "#Atiku -filter:retweets",
          "#Cityboy -filter:retweets", "#Obidatti -filter:retweets", "#TinubuShettima -filter:retweets",
          "#TinubuShettima2023 -filter:retweets", "#NigeriaDecides2023 -filter:retweets",
          "from:officialABAT -filter:retweets", "from:PeterObi -filter:retweets", "from:atiku -filter:retweets"]
try:
    api = twitter_connection()
except Exception as ex:
    print(ex)

count = 0
for query in queries:
    try:
        data = create_cursor(api, query, "2023-01-01")
        data_df = build_dataset(data)
        if count == 0:
            all_tweets_df = data_df
        else:
            all_tweets_df = pd.concat([all_tweets_df, data_df], axis=0)
        count += 1
    except Exception as ex:
        print(ex)
        continue
        
try:
    update_and_save_dataset(all_tweets_df)
except Exception as ex:
    print(ex)

retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]


401 Unauthorized
32 - Could not authenticate you.
retreive new tweets ...


0it [00:00, ?it/s]

401 Unauthorized
32 - Could not authenticate you.
name 'all_tweets_df' is not defined



