In [1]:
%config Completer.use_jedi = False
import psycopg2
import psycopg2.extras
import pandas as pd

import requests
import json
import os
import time

import numpy as np
from dotenv import dotenv_values

### Connection to AWS DB

In [2]:
# get the connection details for the rds db from .env file
config = dotenv_values(".env")  
HOST_RDS = config['HOST_RDS']
DBNAME_RDS = config['DBNAME_RDS']
USER_RDS = config['USER_RDS']
PASSWORD_RDS = config['PASSWORD_RDS']


try: 
    conn = psycopg2.connect(host=HOST_RDS, dbname=DBNAME_RDS, user=USER_RDS, password=PASSWORD_RDS)
except psycopg2.Error as e: 
    print("Error: Could not make connection to the Postgres database")
    print(e)

In [3]:
try: 
    cur = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not get curser to the Database")
    print(e)
    
# Auto commit is very important
conn.set_session(autocommit=True)

In [8]:
#cur.execute("DROP TABLE IF EXISTS tweets;")
#cur.execute("DROP TABLE IF EXISTS tweets_user;")

cur.execute("""CREATE TABLE IF NOT EXISTS tweets11 (
            text text, 
            author_id bigint,
            id bigint,
            created_at text);""")


cur.execute("""CREATE TABLE IF NOT EXISTS tweets_user11 (
            id bigint, 
            username text, 
            name text, 
            location text);""")

table_name_t = 'tweets2'
table_name_u = 'tweets_user2'

### Accessing Twitter API

In [4]:
BEARER_TOKEN = config['BEARER_TOKEN']

# path where twitter files will be stored
path = "Data/twitter/"

In [5]:
FILTER_QUERY = "-from:quakeupdates -from:jojo2727 -from:MonitorSismico -from:MyComicalLife -from:news_sokuho_bot -from:DiariosRobot -from:EN_NERV -from:GDACS -from:earthquake_jp -from:EQAlerts -from:j1_quake -from:iSachinSrivstva -from:VolcanoEWS -from:ChileAlertaApp -from:earthb0t -from:sexy_vegetables -from:zishin3255 -from:everyEarthquake -from:MapQuake -from:swap_bot_bash -from:eq_map -from:eq_map_es -from:eq_map_ww -from:SEISMOinfo -from:VegaBajaWx -from:WatchOurCity -from:Keith_Event -from:SismoDetector -from:cvb_223 -from:ExBulletinUk -from:EMSC -from:StoixeioJewelry -from:megamodo -from:earthquakevt -from:QuakeBotter -from:twtaka_jp -from:EarthquakeTw -from:ENSO1998 -from:eq_map_ww2 -from:eq_map_es2"

In [20]:
start_time = '2011-01-01T00:00:00.000Z'
end_time = "2011-01-31T23:59:59.000Z"
query = "earthquake -minor, -is:reply -is:retweet {0}".format(FILTER_QUERY)
max_results = "500"
tweet_fields = "created_at,author_id"
user_fields = 'username,location'
expansions = 'author_id'
query_params = {'query': query, 'tweet.fields': tweet_fields, 'user.fields': user_fields, \
                'start_time': start_time, 'end_time': end_time, 'max_results': max_results, \
                'expansions': expansions}
url = "https://api.twitter.com/2/tweets/search/all"
headers = {"Authorization": "Bearer " + BEARER_TOKEN}

In [21]:
tweets = []
users = []
while True:
    # get results according to url and query
    response = requests.request("GET", url, headers=headers, params=query_params)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)

    # combine data to one
    json_response = response.json()
    if 'data' in json_response:
        tweets = tweets + json_response['data']
        users = users + json_response['includes']['users']

    # check if more data available, if yes continue process
    if 'meta' in json_response:
        if 'next_token' in json_response['meta']:
            query_params['next_token'] = json_response['meta']['next_token']
            next_token = json_response['meta']['next_token']
          #  logging.info("Fetching next few tweets, next_token: ", query_params['next_token'])
            time.sleep(5)
        else:
            if 'next_token' in query_params:
                del query_params['next_token']
            break
    else:
        if 'next_token' in query_params:
            del query_params['next_token']
        break

In [23]:
# add location to all users, empty string if element does not exist (to insert data into table)
# for item in users:
#     if 'location' in item:
#         pass
#     else:
#         item['location'] = ""
        
# # create iterators
#iter_tweets = iter(tweets)
# iter_users = iter(users)

# insert tweets
psycopg2.extras.execute_batch(cur, """INSERT INTO tweets11 VALUES(
%(text)s,
%(author_id)s,
%(id)s,
%(created_at)s
);""",iter_tweets)

# # insert users
# psycopg2.extras.execute_batch(cur, """INSERT INTO tweets_user11 VALUES(
# %(id)s,
# %(username)s,
# %(name)s,
# %(location)s
# );""",iter_users)


In [12]:
tweets[-1]

{'author_id': '243117582',
 'created_at': '2011-03-11T14:13:32.000Z',
 'text': 'All for Japan! Prayers,hope,love for Japan and the people affected by the earthquake amen!',
 'id': '46212161105707008'}

In [9]:
with open(path + "2011-09-30-2011-07-24.txt",
          "w") as outfile:
    outfile.write(json.dumps(users, indent=4))

In [11]:
cur.close()
conn.close()