## Imports

In [1]:
import psycopg2
import os
import requests
import json
from os import path
from dotenv import load_dotenv, find_dotenv
from datetime import datetime

"""---------------------- env constants --------------------------"""

# find .env by walking up directories until it's found
dotenv_path = find_dotenv()

# load up the entries as environment variables
load_dotenv(dotenv_path)

"""---------------------- database constants --------------------------"""

DATABASE_URL = os.environ.get('DATABASE_URL')
DATABASE_NAME = os.environ.get("DATABASE_NAME")
DATABASE_USER = os.environ.get("DATABASE_USER")
DATABASE_PASSWORD = os.environ.get("DATABASE_PASSWORD")

"""---------------------- filepath and id constants --------------------------"""

# Filepath that contains the items that we are interested in forcasting
FILENAME = 'data/items.txt'

# region ids that will be used in the function collect_data 
REGIONS = [0, 30000142, 30000144, 60003760, 60008494, 60011866, 60004588, 60005686]

## Table of Contents

1. [Data Collection](#data-collection)
2. [Preprocessing](#preprocessing)

## Data Collection

In [4]:
def get_raw_material_names():
    """ Return a list of raw material names from from FILENAME

    Returns:
        raw_material_names: item names of interest for forcast
    """
    
    
    # List that will be returned after it has been populated
    raw_material_names = []
    
    if path.exists(FILENAME):
        
        file = open(FILENAME, "r")
        
        # Read each line from FILENAME and append each item name into the list
        for item in file.readlines():
            raw_material_names.append(item)
            
        file.close()
        
    else:
        print(FILENAME, "does not exist.")
    
    # Make sure that the item names are not repeated
    raw_material_names = list(set(raw_material_names))
    
    return raw_material_names

def get_item_id(item_name=None):
    """ Returns the item ID from using the API endpoint https://www.fuzzwork.co.uk/api/typeid.php?typename=Silicon

    Args:
        item_name (string): Name of the raw material. Defaults to None.

    Returns:
        list: A list of ids that are of type int
    """
    
    item_id = None
    
    if item_id:
        return item_id
    
def fetch_data(region_id, item_id):
    """ Returns JSON given an input of the region and item from an API

    Args:
        region_id (int): id that is assigned to each major market region.
        item_id (int): id that is assigned to each raw material.

    Returns:
        dict: JSON Data from https://market.fuzzwork.co.uk/aggregates/?region=30000142&types=9828
    """
    
    api_url = "https://market.fuzzwork.co.uk/aggregates/?region=" + str(region_id) + "&types=" + str(item_id)
    r = requests.get(api_url)
    
    # encoding as json
    raw_material_data = r.json()
    
    return raw_material_data

In [21]:
# Testing the functions defined above
D1 = {}
D2 = fetch_data(0, 2073)
D3 = fetch_data(0, 9828)

D1.update(D2)
D1.update(D3)
print(D1)

{'2073': {'buy': {'weightedAverage': '0.635268011252', 'max': '1.38', 'min': '0.01', 'stddev': '0.53276470913', 'median': '0.92', 'volume': '709540043.0', 'orderCount': '18', 'percentile': '1.38'}, 'sell': {'weightedAverage': '4.24927857003', 'max': '150.0', 'min': '1.0', 'stddev': '21.9770924539', 'median': '2.72', 'volume': '319974773.0', 'orderCount': '45', 'percentile': '1.8641616903'}}, '9828': {'buy': {'weightedAverage': '344.378438957', 'max': '600.0', 'min': '0.01', 'stddev': '169.534536467', 'median': '386.4', 'volume': '19499124.0', 'orderCount': '83', 'percentile': '547.615833122'}, 'sell': {'weightedAverage': '610.52940589', 'max': '5916.0', 'min': '200.0', 'stddev': '614.256526942', 'median': '599.55', 'volume': '8238744.0', 'orderCount': '82', 'percentile': '344.968270892'}}}


In [None]:
def store_data():
    """ GET Requests the Eve Online API endpoint https://market.fuzzwork.co.uk/aggregates/?region=30000142&types=9828 
    which takes two params region and types. This JSON data is then stored
    
    Region: The location that the markets are located in
    - There are 7 Regions
        - Global - 0
        - Jita - 30000142
        - Perimeter - 30000144
        - Jita 4-4 CNAP - 60003760
        - Amarr VIII - 60008494
        - Dodixie - 60011866
        - Rens - 60004588
        - Hek - 60005686
        
    Types: The ID of the raw material, List of IDs: https://docs.google.com/spreadsheets/d/1X7mi7j-_yV5lq-Yd2BraE-t4QE_a4IKv2ZuCBSLD6QU/edit?usp=sharing
    """
    
    items = get_raw_material_names()
    item_ids = [ get_item_id(i) for i in items ]
    
    json_data = {}
    time = datetime.now()
    
    # Fetch the data for each region and raw material id
    print("Fetching data...")
    for i in item_ids:
        for r in REGIONS:
            data = fetch_data(region_id=r, item_id=i)
            json_data.update(data)
    
    # Connecting to the database
    print("Trying to connect to database...")
    try:
        conn = psycopg2.connect(host=DATABASE_URL,
                                database=DATABASE_NAME,
                                user=DATABASE_USER,
                                password=DATABASE_PASSWORD)
        cursor = conn.cursor()
        
        print("Connected to database and now inserting data into table...")
        
        # sql to insert the json into the raw_market_data table
        insert_command = (
        """
        INSERT INTO raw_market_data (TIME, DATA) VALUES (%s, %s)
        """
        )
        
        # Serializing json
        json_object = json.dumps(json_data)
        
        # Inserting into table
        values = (time, json_object)
        cursor.execute(insert_command, values)
        
        # Close and commit changes to database server
        conn.close()
        conn.commit()
        
        print("Successfully stored data into table.")
        
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        if conn is not None:
            conn.close()
    
    

## Preprocessing