#### IMPORT LIBRARIES

In [55]:
import configparser
from typing import List
import tweepy
import pandas as pd
import psycopg2

pd.options.mode.chained_assignment = None  # default='warn'

#### READ CONFIG

In [2]:
# read config 
config = configparser.ConfigParser()
config.read('config.ini')

# read Twitter Config
CONSUMER_KEY = config['twitter']['CONSUMER_KEY']
CONSUMER_SECRET = config['twitter']['CONSUMER_SECRET']
ACCESS_TOKEN = config['twitter']['ACCESS_TOKEN']
ACCESS_SECRET = config['twitter']['ACCESS_SECRET']

LAST_TWEET_ID = config['filename']['LAST_TWEET_ID_FILENAME']

# read database password
DATABASE_PASSWORD = config['database']['DATABASE_PASSWORD']

#### AUTHENTICATE TWITTER

In [3]:
# authenticate
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth)

#### CONNECT DATABASE

In [4]:
conn = psycopg2.connect("host=localhost dbname=tweets user=postgres password=example")

#### BUSINESS LOGIC

In [56]:
def get_tweets(keyword: str, lastTweetId) -> List[str]:
  all_tweets = []

  # for tweet in api.search_tweets(q=keyword, lang='en', count=10, result_type="mixed", tweet_mode="extended"):
  for tweet in api.search_tweets(q=keyword, lang='en', count=10, result_type="mixed", tweet_mode="extended", since_id=lastTweetId):
    all_tweets.append(tweet)
  
  return all_tweets

# def getPremiumTweets(keyword: str) -> List[str]:
#   premiumTweets = []
#   for tweet in api.search_30_day(label="development", query=keyword, maxResults=10):
#     premiumTweets.append(tweet)
#   return premiumTweets

def parsingTweets(tweets: List) -> List[str]:
  parsedTweets = []

  for tweet in tweets:
    parsedTweets.append({
      "created_at": tweet._json["created_at"],
      "id": tweet._json["id"],
      "full_text": tweet._json["full_text"],
      "username": tweet._json["user"]["name"],
      "handle": tweet._json["user"]["screen_name"],
      "location": tweet._json["user"]["location"],
      "favorite_count": tweet._json["favorite_count"],
      "retweet_count": tweet._json["retweet_count"]
    })
    
  return parsedTweets


def saveLastTweetId(tweets: List):
  id = tweets[-1]["id"]

  fileName = LAST_TWEET_ID
  f = open(fileName, 'w+')
  f.write(str(id))
  f.close()
  return None

def readLastTweetId():
  fileName = LAST_TWEET_ID
  f = open(fileName, "r")
  id = int(f.read())
  return id

tweets = get_tweets('covid19', readLastTweetId())
parsedTweets = parsingTweets(tweets)
saveLastTweetId(parsedTweets)

tweetsDf = pd.DataFrame.from_dict(parsedTweets)
headCount = 3
# print(tweetsDf.head(10))
tweetsDf.head(headCount)

Unnamed: 0,created_at,id,full_text,username,handle,location,favorite_count,retweet_count
0,Sun Feb 19 19:36:19 +0000 2023,1627391651470606339,RT @Newzroom405: Bulelani Qolani was dragged n...,JB_Maverick,JB49482674,Pretoria,0,6
1,Sun Feb 19 19:36:14 +0000 2023,1627391629328896000,"RT @pfizer: For John Legend, there is nothing ...",Schnitz,TrudeauMustGooo,,0,373
2,Sun Feb 19 19:36:05 +0000 2023,1627391590900633604,RT @WHOWPRO: #VaccinesWork to protect against ...,Donal BIsanzio,donal_bisanzio,"Nottingham, England",0,12


In [57]:
#TODO transform date from STR to Date object
sampleTweets = tweetsDf.head(headCount)
# sampleTweets['created_at'].dtype
sampleTweets['dates_parsed'] = pd.to_datetime(sampleTweets['created_at'], format="%a %b %d %H:%M:%S %z %Y")
sampleTweets

Unnamed: 0,created_at,id,full_text,username,handle,location,favorite_count,retweet_count,dates_parsed
0,Sun Feb 19 19:36:19 +0000 2023,1627391651470606339,RT @Newzroom405: Bulelani Qolani was dragged n...,JB_Maverick,JB49482674,Pretoria,0,6,2023-02-19 19:36:19+00:00
1,Sun Feb 19 19:36:14 +0000 2023,1627391629328896000,"RT @pfizer: For John Legend, there is nothing ...",Schnitz,TrudeauMustGooo,,0,373,2023-02-19 19:36:14+00:00
2,Sun Feb 19 19:36:05 +0000 2023,1627391590900633604,RT @WHOWPRO: #VaccinesWork to protect against ...,Donal BIsanzio,donal_bisanzio,"Nottingham, England",0,12,2023-02-19 19:36:05+00:00
