In [1]:
# import modules
import tweepy
from tweepy.streaming import Stream
import pandas as pd
import numpy as np
import re
import os

# env module
import dotenv
dotenv_file = dotenv.find_dotenv()
dotenv.load_dotenv(dotenv_file, override=True)

# # TWITTER AUTHENTICATER # #
class TwitterAuthenticator():
    '''
    Class for keys authentication
    '''

    def __init__(self) -> None:
        pass

    def authenticate(self):
        # set online authentication
        auth = tweepy.OAuth1UserHandler(
            os.getenv('consumer_key'), 
            os.getenv('consumer_secret'), 
            os.getenv('access_token'), 
            os.getenv('access_token_secret'))

        # calling the API
        api = tweepy.API(auth)

        return api


# # TWEETS RAW DATA # #
class TweeterMining:
    '''
    Processing tweets searched by term
    '''

    df = pd.DataFrame()

    def __init__(self, search_term = None, tweets_amount=5):
        self.search_term = search_term
        self.tweets_amount = tweets_amount
        self.twitter_authenticator = TwitterAuthenticator()


    def get_tweets(self):
        '''
        gets informatio from the tweets, txt, ids, likes, and time.
        '''
        # use lowercases for search term
        search_term = self.search_term
        tweet_amount = self.tweets_amount
        api = self.twitter_authenticator.authenticate()
        tweets_raw = []
        ids = []
        likes = []
        time = []


        # return most recent search word
        # search by topic
        data = tweepy.Cursor(api.search_tweets,
                            q = search_term,
                            lang='en',
                            tweet_mode='extended',
                            ).items(tweet_amount)



        # getting data into a lists
        for tweet in data:
            #print(tweet.text)
            tweets_raw.append(tweet.full_text)
            ids.append(tweet.id)
            likes.append(tweet.favorite_count)
            time.append(tweet.created_at)

        df = pd.DataFrame({'tweets':tweets_raw, 'id':ids,'likes':likes,'time':time})

        # # removing retweets (all the line)
        df = df[~df.tweets.str.contains("RT")]

        self.df = df
        
        return df

    
    def cleaning(self): 
        '''
        removes extra information from the tweets
        '''  
        # cleaning tweets, removing mentioned users (@)
        self.df['tweets'] = self.df['tweets'].str.replace('htt.*', '', regex=True)
        self.df['tweets'] = self.df['tweets'].str.replace(r'@.\w+', '', regex=True)
        self.df['tweets'] = self.df['tweets'].str.replace(r'#.\w+', '', regex=True)
        self.df['tweets'] = self.df['tweets'].str.replace(r'\n', '', regex=True)
        self.df['tweets'] = self.df['tweets'].str.replace(u'[\U0001F600-\U0001F975].+', '', regex=True)

        # reset index
        self.df = self.df.reset_index(drop=True)

        return self.df








In [2]:
miningtweeter = TweeterMining("global warming ?", 20)

df = miningtweeter.get_tweets()

df = miningtweeter.cleaning()


df

Unnamed: 0,tweets,id,likes,time
0,Let me guess.. In person voting with ID caus...,1551222796684083200,0,2022-07-24 15:08:29+00:00
1,In Leeds? wow that is global warming.,1551222483373568000,0,2022-07-24 15:07:15+00:00
2,Have you watched the movie? Of course you ha...,1551222365505220609,0,2022-07-24 15:06:46+00:00
3,I am still failing to find the effects of glob...,1551222325827325954,0,2022-07-24 15:06:37+00:00
4,If its global warming year on year ??How com...,1551221838876901378,0,2022-07-24 15:04:41+00:00
5,Ohh really Al? You have a net worth of over $...,1551221792206987267,0,2022-07-24 15:04:30+00:00
6,Why was it renamed? What was wrong with glo...,1551221660056952832,0,2022-07-24 15:03:58+00:00
7,"Oh, you don't read or watch fictional horror b...",1551221241503027202,4,2022-07-24 15:02:18+00:00


In [5]:
df['tweets'][2]

'  Have you watched the movie? Of course you haven’t. You just group think with the rest of the moronic Left. You leftists are the Kool Aid aid drinkers. Trump is a Russian agent! We only have 12 years left to stop global warming! (This claim is made every 12 years).'

In [6]:


def writeTweets(ids, msg):

    replay = []
    client = tweepy.Client(
        os.getenv('bearer_token'), 
        os.getenv('consumer_key'), 
        os.getenv('consumer_secret'), 
        os.getenv('access_token'), 
        os.getenv('access_token_secret'))

    for i in range(df.shape[0]):
        #temp = client.create_tweet(in_reply_to_tweet_id=ids[i],text= msg[i])
        temp = ids[i]
        replay.append(temp)

    return replay

In [None]:
writeTweets(df['id'], df['tweets'])