# The Tweet Collector

To avoid making unnecessary Twitter API calls, we will use a repository of tweets held in an
a remote Postgres instance. This will save a selected number of tweets to a local CSV file to be
batch-processed by the other jobs in the application.

In [None]:
# this cell sets up the database connections to pull data directly

import configparser
import os
import psycopg2

def config_file_reader(API_caller: str) -> tuple:
    """
    A common configuration file reader.

    Reads data from a common configuration file, determining which fields to call depending
    on the API caller passed to it.

    :param API_caller:(str) the name of the service calling this API

    :return: (tuple) a tuple of strings of each configuration returned for the called service
    """
    home_directory_path = os.path.expanduser("~")
    logger_directory_path = os.path.join(home_directory_path, "Quantum", "Event Detector",
                                         "Twitter Event Detector", "Logs")
    config_directory_path = os.path.join(home_directory_path, "Quantum", "Event Detector",
                                         "Twitter Event Detector", "Common")
    config_file_path = os.path.join(config_directory_path, "config.ini")

    # instantiates the configuration parser
    config = configparser.ConfigParser()

    # if config files exists, proceed: else, create directory structure, then fail gracefully
    if os.path.exists(config_file_path):
        config.read(config_file_path)
    else:
        os.makedirs(config_directory_path)
        print("No config file found in " + config_directory_path +
              ". Please place a configuration file into this directory and try again.")

    if API_caller == "data_access_object":
        database_type = config["DATABASE"]["type"]
        database_host = config["DATABASE"]["host"]
        database_name = config["DATABASE"]["database_name"]
        database_user = config["DATABASE"]["user"]
        database_password = config["DATABASE"]["password"]
        database_instance_id = config["DATABASE"]["database_instance_id"]
        database_port = config["DATABASE"]["database_port"]
        return database_type, database_host, database_name, database_user, database_password, \
               database_instance_id, database_port
    elif API_caller == "logger_setup":
        return logger_directory_path,
    elif API_caller == "languages":
        languages = config["LANGUAGES"]["supported_languages"]
        return languages
    elif API_caller == "account_metadata_importer":
        # this API call only requires the directory path to the config file (which stores a CSV file necessary)
        return config_directory_path,
    else:
        print("Error on reading config file: no API caller specified")

def raw_tweet_database_connector():
    """
    Creates and returns a connection object to a PostgreSQL database.

    :return: (psycopg2.connect) a PostgreSQL connection object
    """
    config = config_file_reader("data_access_object")
    database_type, database_host, database_name, database_user, database_password, \
        database_instance_id, database_port = config

    try:
        connection = psycopg2.connect(host=database_host, dbname=database_name, user=database_user,
                                      password=database_password, port=database_port)
        return connection
    except psycopg2.OperationalError:
        print('Database connection error')

def raw_tweet_database_reader() -> list:
    """
    Reads the last 100 entries in the Raw Tweet Database.

    :return: (pandas dataframe) a dataframe containing the last 100 entries in the Raw Tweet Database
    """
    # calls the database connector
    connection = raw_tweet_database_connector()
    cursor = connection.cursor()

    sql = "SELECT tweet_time_created, tweet_uid, tweet_text, tweet_source, reply_tweet_uid, reply_tweet_count, " \
      "quote_tweet, quote_tweet_uid, quote_tweet_text, quote_tweet_count, retweet_tweet_status,  " \
      "retweet_tweet_count, tweet_language, user_uid, user_name, user_screen_name, user_description, " \
      "user_verification, user_follower_count, user_friends_count, user_statuses_count, user_time_created, " \
      "tweet_coordinates, tweet_place, tweet_place_country_code, tweet_place_bounding_box, " \
      "tweet_hashtags, tweet_urls, tweet_symbols, tweet_user_mentions, user_location FROM twitter_posts " \
      "ORDER BY tweet_time_created DESC LIMIT %s;"

    cursor.execute(sql, (100,))
    return cursor.fetchall()


In [None]:
# add code that writes fetched data to CSV