#### IMPORT LIBRARIES

In [55]:
import configparser
from typing import List
import tweepy
import pandas as pd
import psycopg2

pd.options.mode.chained_assignment = None  # default='warn'

#### READ CONFIG

In [2]:
# read config 
config = configparser.ConfigParser()
config.read('config.ini')

# read Twitter Config
CONSUMER_KEY = config['twitter']['CONSUMER_KEY']
CONSUMER_SECRET = config['twitter']['CONSUMER_SECRET']
ACCESS_TOKEN = config['twitter']['ACCESS_TOKEN']
ACCESS_SECRET = config['twitter']['ACCESS_SECRET']

LAST_TWEET_ID = config['filename']['LAST_TWEET_ID_FILENAME']

# read database password
DATABASE_PASSWORD = config['database']['DATABASE_PASSWORD']

#### AUTHENTICATE TWITTER

In [3]:
# authenticate
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth)

#### CONNECT DATABASE

In [4]:
conn = psycopg2.connect("host=localhost dbname=tweets user=postgres password=example")

#### BUSINESS LOGIC

In [71]:
def get_tweets(keyword: str, lastTweetId) -> List[str]:
  all_tweets = []

  # for tweet in api.search_tweets(q=keyword, lang='en', count=10, result_type="mixed", tweet_mode="extended"):
  for tweet in api.search_tweets(q=keyword, lang='en', count=10, result_type="mixed", tweet_mode="extended", since_id=lastTweetId):
    all_tweets.append(tweet)
  
  return all_tweets

# def getPremiumTweets(keyword: str) -> List[str]:
#   premiumTweets = []
#   for tweet in api.search_30_day(label="development", query=keyword, maxResults=10):
#     premiumTweets.append(tweet)
#   return premiumTweets

def parsingTweets(tweets: List) -> List[str]:
  parsedTweets = []

  for tweet in tweets:
    parsedTweets.append({
      "created_at": tweet._json["created_at"],
      "id": tweet._json["id"],
      "full_text": tweet._json["full_text"],
      "username": tweet._json["user"]["name"],
      "handle": tweet._json["user"]["screen_name"],
      "location": tweet._json["user"]["location"],
      "favorite_count": tweet._json["favorite_count"],
      "retweet_count": tweet._json["retweet_count"]
    })
    
  return parsedTweets


def saveLastTweetId(tweets: List):
  id = tweets[-1]["id"]

  fileName = LAST_TWEET_ID
  f = open(fileName, 'w+')
  f.write(str(id))
  f.close()
  return None

def readLastTweetId():
  fileName = LAST_TWEET_ID
  f = open(fileName, "r")
  id = int(f.read())
  return id

tweets = get_tweets('covid19', readLastTweetId())
parsedTweets = parsingTweets(tweets)
saveLastTweetId(parsedTweets)

cursor_df = pd.DataFrame.from_dict(parsedTweets)
headCount = 10
# print(cursor_df.head(10))
cursor_df.head(headCount)

Unnamed: 0,created_at,id,full_text,username,handle,location,favorite_count,retweet_count
0,Sat Feb 25 18:38:52 +0000 2023,1629551520592060416,"RT @JoePajak: 'Nationally, the number of hospi...",Amanda 🇺🇦🌻,amandafclark,United Kingdom,0,6
1,Sat Feb 25 18:38:48 +0000 2023,1629551502984355840,RT @kier323: serious UTI caused by #COVID19 #C...,Lorraine A,L56046219,"Glasgow, Scotland",0,2
2,Sat Feb 25 18:38:38 +0000 2023,1629551460470882307,RT @P_McCulloughMD: Time and effort on @CDCDir...,Wade Miller,WadeMiller_USMC,,0,187
3,Sat Feb 25 18:38:37 +0000 2023,1629551456029163520,RT @vera_tenacious: @SylviaJonesMPP Protect yo...,Judy 🇺🇦 🦖🌹 #NHS #ClimateEmergency #RejoinEU ⭐,JudyWeb92176381,"London, England",0,2
4,Sat Feb 25 18:38:36 +0000 2023,1629551452023562248,RT @DebbyvanRiel: DEADLINE Febr 28\nESWI early...,bridget ❤,Bridget__4076,,0,12
5,Sat Feb 25 18:38:24 +0000 2023,1629551400307814401,@US_PeoplesParty @TheDemocrats I have a few fo...,Marijuana is Tree of Life,MimiTexasAngel,Planet Earth,0,0
6,Sat Feb 25 18:38:22 +0000 2023,1629551395194929152,This thread shows we are well and truly screwe...,Carl Gorney,buffalocharging,,0,0
7,Sat Feb 25 18:38:10 +0000 2023,1629551344070664193,@djassassinpgh @sixfootfourman @highbrow_nobro...,Luke Chilton,smartnootropics,United Kingdom,0,0
8,Sat Feb 25 18:38:07 +0000 2023,1629551332683046913,RT @DrTedros: #COVID19 is still disrupting liv...,Tal Barenboim,taltalon,"Haifa, Israel",0,84
9,Sat Feb 25 18:38:05 +0000 2023,1629551322931314688,"@POTUS POTUS my name is Michael Wrangstadh, I ...",Michael Wrangstadh,MichaelWrangsta,Switzerland,0,0


In [72]:
#TODO transform date from STR to Date object
sampleTweets_df = cursor_df.head(headCount).copy(deep=True)
# sampleTweets_df['created_at'].dtype
sampleTweets_df['dates_parsed'] = pd.to_datetime(sampleTweets_df['created_at'], format="%a %b %d %H:%M:%S %z %Y")
sampleTweets_df

Unnamed: 0,created_at,id,full_text,username,handle,location,favorite_count,retweet_count,dates_parsed
0,Sat Feb 25 18:38:52 +0000 2023,1629551520592060416,"RT @JoePajak: 'Nationally, the number of hospi...",Amanda 🇺🇦🌻,amandafclark,United Kingdom,0,6,2023-02-25 18:38:52+00:00
1,Sat Feb 25 18:38:48 +0000 2023,1629551502984355840,RT @kier323: serious UTI caused by #COVID19 #C...,Lorraine A,L56046219,"Glasgow, Scotland",0,2,2023-02-25 18:38:48+00:00
2,Sat Feb 25 18:38:38 +0000 2023,1629551460470882307,RT @P_McCulloughMD: Time and effort on @CDCDir...,Wade Miller,WadeMiller_USMC,,0,187,2023-02-25 18:38:38+00:00
3,Sat Feb 25 18:38:37 +0000 2023,1629551456029163520,RT @vera_tenacious: @SylviaJonesMPP Protect yo...,Judy 🇺🇦 🦖🌹 #NHS #ClimateEmergency #RejoinEU ⭐,JudyWeb92176381,"London, England",0,2,2023-02-25 18:38:37+00:00
4,Sat Feb 25 18:38:36 +0000 2023,1629551452023562248,RT @DebbyvanRiel: DEADLINE Febr 28\nESWI early...,bridget ❤,Bridget__4076,,0,12,2023-02-25 18:38:36+00:00
5,Sat Feb 25 18:38:24 +0000 2023,1629551400307814401,@US_PeoplesParty @TheDemocrats I have a few fo...,Marijuana is Tree of Life,MimiTexasAngel,Planet Earth,0,0,2023-02-25 18:38:24+00:00
6,Sat Feb 25 18:38:22 +0000 2023,1629551395194929152,This thread shows we are well and truly screwe...,Carl Gorney,buffalocharging,,0,0,2023-02-25 18:38:22+00:00
7,Sat Feb 25 18:38:10 +0000 2023,1629551344070664193,@djassassinpgh @sixfootfourman @highbrow_nobro...,Luke Chilton,smartnootropics,United Kingdom,0,0,2023-02-25 18:38:10+00:00
8,Sat Feb 25 18:38:07 +0000 2023,1629551332683046913,RT @DrTedros: #COVID19 is still disrupting liv...,Tal Barenboim,taltalon,"Haifa, Israel",0,84,2023-02-25 18:38:07+00:00
9,Sat Feb 25 18:38:05 +0000 2023,1629551322931314688,"@POTUS POTUS my name is Michael Wrangstadh, I ...",Michael Wrangstadh,MichaelWrangsta,Switzerland,0,0,2023-02-25 18:38:05+00:00
