# Data Engineering your stocks

In [None]:
%%capture
!pip install -r requirements.txt

In [10]:
import os
import pandas as pd
import numpy as np
from utils import get_or_load, get_stocks, get_ticker_symbols, get_performance, get_stock_timeline

# Task 1: Get stock price data from API

In [17]:
def get_stock_prices(stock: str, apikey: str):
    api_result = get_stock_API(stock, apikey)
    output = transform_api_output(api_result)
    output['ticker'] = stock
    output['close'] = output['close'].astype(float)
    return output

In [18]:
import requests
def get_stock_API(stock: str, apikey: str):
    API_URL = "https://www.alphavantage.co/query"
    data = {
        "function": "TIME_SERIES_DAILY",
        "symbol": stock,
        "outputsize": "compact",
        "datatype": "csv",
        "apikey": apikey,
    }
    response = requests.get(API_URL, data)
    if 'Invalid API call' in response.text:
        raise KeyError("Ticker not valid!")
    while '5 calls per minute' in response.text:
        time.sleep(60)
        response = requests.get(API_URL, data)
    return response.text

In [None]:
get_stock_API('AAPL', os.environ['API_KEY'])

### Your code

In [8]:
def transform_api_output(api_result: str):
    # TODO: Transform code here
    return pd.DataFrame()

Test your code

In [11]:
result = transform_api_output(get_stock_API('AAPL', os.environ['API_KEY']))
print(f"Check 1: Actual shape: {result.shape}, expected shape: {(100,6)}")
print(f"Check 2: Actual columns: {list(result.columns)}, expected columns: {['timestamp', 'open', 'high', 'low', 'close', 'volume']}")

Check 1: Actual shape: (0, 0), expected shape: (100, 6)
Check 2: Actual columns: [], expected columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume']


# Task 2: Buffering Data

In [15]:
def get_or_load(stock: str, apikey: str):
    # Define folder, filename and file path
    file_name = f'{stock}.csv'
    data_dir = 'data/current/'
    file_path = f'{data_dir}{file_name}'
    
    # Check if stock already in data lake
    if file_name in os.listdir(data_dir):
        data = pd.read_csv(file_path)
        return data
    
    # If stock not found locally, return API call
    data = get_stock_prices(stock, apikey)
    if 'timestamp' in data.columns:
        data.to_csv(file_path)
        return data
    else:
        raise KeyError('Column timestamp not in dataframe!')
        
    

In [16]:
get_or_load('AAPL', os.environ['API_KEY'])

KeyError: 'close'

### Your code

# Task 3: Get and clean historical data

### Your code

Test your code

In [None]:
result = transform_hist_data(historical_prices)
result_current = get_or_load('AAPL', os.environ['API_KEY'])
print(f"Check 1: Actual close mean: {result.close.mean()}, expected mean: ~{174}")
print(f"Check 2: Actual date format: {result.timestamp.to_list()[-1]}, expected date format: 1999-11-01")

# Task 4: Get stock prices for multiple stocks

### Your code

Test your code

# Task 5: Feature Engineering
## Lets add additonal features such as stock performance

### Your code

Test your code