In [1]:
# Import required libraries
import pandas as pd 
import numpy as np 
import snscrape.modules.twitter as sntwitter 
import datetime 
from tqdm.notebook import tqdm_notebook 
from apyori import apriori

In [2]:
# Get user input for search query, start and end dates, and max number of tweets to retrieve
text = input('Enter query text to be matched (or leave it blank by pressing enter)') 
since = input('Enter startdate in this format yyyy-mm-dd (or leave it blank by pressing enter): ') 
until = input('Enter enddate in this format yyyy-mm-dd (or leave it blank by pressing enter): ') 
count = int(input('Enter max number of tweets or enter -1 to retrieve all possible tweets: ')) 

Enter query text to be matched (or leave it blank by pressing enter)Federal Bank
Enter startdate in this format yyyy-mm-dd (or leave it blank by pressing enter): 2023-01-01
Enter enddate in this format yyyy-mm-dd (or leave it blank by pressing enter): 2023-03-31
Enter max number of tweets or enter -1 to retrieve all possible tweets: -1


In [3]:
# Define a function to create the search query using the user input
def search(text,since,until): 
    global filename 
    q = text
    # If end date is not specified, use today's date as the end date
    if until=='':
        until = datetime.datetime.strftime(datetime.date.today(), '%Y-%m-%d')
    q += f" until:{until}"
    # If start date is not specified, use 7 days before the end date as the start date
    if since=='':
        since = datetime.datetime.strftime(datetime.datetime.strptime(until, '%Y-%m-%d') - datetime.timedelta(days=7), '%Y-%m-%d')
    q += f" since:{since}"
    # Create a filename for the output file based on the search query and dates
    filename = f"{since}_{until}_{text}.csv"
    print(filename)
    return q 

In [4]:
# Call the search function and store the resulting query in the 'q' variable
q = search(text, since, until)
 
tweets_list1 = [] 

# If the user has chosen to retrieve all tweets, use a progress bar to show progress
if count == -1: 
    for i, tweet in enumerate(tqdm_notebook(sntwitter.TwitterSearchScraper(q).get_items())): 
        hashtags = tweet.hashtags
        # Add the hashtags from each tweet to the 'tweets_list1' list
        if hashtags is not None:
            tweets_list1.append(hashtags) 
# If the user has specified a limit on the number of tweets to retrieve, use a progress bar to show progress
else: 
    with tqdm_notebook(total=count) as pbar: 
        for i, tweet in enumerate(sntwitter.TwitterSearchScraper(q).get_items()): 
            # Break the loop if the maximum number of tweets has been reached
            if i >= count:  
                break 
            hashtags = tweet.hashtags
            # Add the hashtags from each tweet to the 'tweets_list1' list
            if hashtags is not None:
                tweets_list1.append(hashtags) 
            # Update the progress bar
            pbar.update(1) 

2023-01-01_2023-03-31_Federal Bank.csv


0it [00:00, ?it/s]

In [5]:
# Convert the list of hashtags to a pandas dataframe
tweets_df1 = pd.DataFrame({'Hashtags': tweets_list1})

# Remove any rows where the 'Hashtags' column is empty
tweets_df1 = tweets_df1.dropna(subset=['Hashtags'])

# Convert each list of hashtags in the 'Hashtags' column to a set to remove duplicates, then convert it back to a list and store it in a new list called 'Hashtags'
Hashtags = tweets_df1['Hashtags'].apply(lambda x: list(set(x))).tolist()

In [6]:
min_support = 0.05
min_confidence = 0.95
min_lift = 1.0

In [7]:
# Use the apriori algorithm to generate association rules for the list of hashtags
results = list(apriori(Hashtags, 
                       min_support=min_support, 
                       min_confidence=min_confidence, 
                       min_lift=min_lift))

In [8]:
# Print the results of the apriori algorithm
for r in results:
    print(f"Itemset: {', '.join(r.items)}")
    print(f"Support: {r.support:.3f}")
    print(f"Confidence: {r.ordered_statistics[0].confidence:.3f}")
    print(f"Lift: {r.ordered_statistics[0].lift:.3f}")
    print()

Itemset: MostAdmiredBank, FederalBank
Support: 0.061
Confidence: 0.993
Lift: 2.240

Itemset: RishtaAapSeHaiSirfAppSeHai, FederalBank
Support: 0.062
Confidence: 1.000
Lift: 2.255

Itemset: FederalBank, perfectbankingpartner
Support: 0.057
Confidence: 0.966
Lift: 2.178

Itemset: MostAdmiredBank, Rishta
Support: 0.060
Confidence: 0.980
Lift: 8.670

Itemset: RishtaAapSeHaiSirfAppSeHai, MostAdmiredBank
Support: 0.060
Confidence: 0.980
Lift: 15.820

Itemset: MostAdmiredBank, perfectbankingpartner
Support: 0.057
Confidence: 0.959
Lift: 15.579

Itemset: RishtaAapSeHaiSirfAppSeHai, Rishta
Support: 0.060
Confidence: 0.974
Lift: 8.614

Itemset: perfectbankingpartner, Rishta
Support: 0.058
Confidence: 0.980
Lift: 8.663

Itemset: RishtaAapSeHaiSirfAppSeHai, perfectbankingpartner
Support: 0.057
Confidence: 0.966
Lift: 15.588

Itemset: MostAdmiredBank, FederalBank, Rishta
Support: 0.060
Confidence: 0.980
Lift: 11.769

Itemset: MostAdmiredBank, FederalBank, RishtaAapSeHaiSirfAppSeHai
Support: 0.060
Co

In [9]:
for r in results:
    # Extract the antecedent and consequent items from the association rule
    antecedent = ', '.join(r.ordered_statistics[0].items_base)
    consequent = ', '.join(r.ordered_statistics[0].items_add)
    # Extract the support, confidence, and lift values from the association rule
    support = r.support
    confidence = r.ordered_statistics[0].confidence
    lift = r.ordered_statistics[0].lift
    # Print the association rule with its support, confidence, and lift values
    print(f"{antecedent} -> {consequent}: Support={support:.3f}, Confidence={confidence:.3f}, Lift={lift:.3f}")

MostAdmiredBank -> FederalBank: Support=0.061, Confidence=0.993, Lift=2.240
RishtaAapSeHaiSirfAppSeHai -> FederalBank: Support=0.062, Confidence=1.000, Lift=2.255
perfectbankingpartner -> FederalBank: Support=0.057, Confidence=0.966, Lift=2.178
MostAdmiredBank -> Rishta: Support=0.060, Confidence=0.980, Lift=8.670
MostAdmiredBank -> RishtaAapSeHaiSirfAppSeHai: Support=0.060, Confidence=0.980, Lift=15.820
perfectbankingpartner -> MostAdmiredBank: Support=0.057, Confidence=0.959, Lift=15.579
RishtaAapSeHaiSirfAppSeHai -> Rishta: Support=0.060, Confidence=0.974, Lift=8.614
perfectbankingpartner -> Rishta: Support=0.058, Confidence=0.980, Lift=8.663
perfectbankingpartner -> RishtaAapSeHaiSirfAppSeHai: Support=0.057, Confidence=0.966, Lift=15.588
MostAdmiredBank -> FederalBank, Rishta: Support=0.060, Confidence=0.980, Lift=11.769
MostAdmiredBank -> RishtaAapSeHaiSirfAppSeHai, FederalBank: Support=0.060, Confidence=0.980, Lift=15.820
perfectbankingpartner -> FederalBank, MostAdmiredBank: Sup