# Stock Price Data

In [None]:
import concurrent.futures
import os

def read_file(filename):
    with open(filename, 'r') as f:
        data = f.read().strip()
    key = filename.replace(".csv", "").replace("prices/", "")
    data = data.split("\n")
    data = [d.split(",") for d in data]
    return key, data

results = []
pool = concurrent.futures.ProcessPoolExecutor(max_workers=2)
filenames = ["prices/{}".format(f) for f in os.listdir("prices")]
prices = pool.map(read_file, filenames)
prices = list(prices)
prices = dict(prices)
prices

I chose to use a hash table, then a list, then another list. This is because it closely mirrors the existing structure of the data. It allows stock symbols to be easily looked up, and enables me to index values efficiently.



# Computing Aggregates

In [None]:
from dateutil.parser import parse

prices_columns = {}

for k,v in prices.items():
    price = v
    headers = price[0]
    price_columns = {}
    for i, header in enumerate(headers):
        values = [p[i] for p in price[1:]]
        if i > 0:
            values = [float(v) for v in values]
        else:
            values = [parse(v) for v in values]
        price_columns[header] = values
    prices_columns[k] = price_columns

In [None]:
from statistics import mean

average_closing = {}
for k,v in prices_columns.items():
    average_closing[k] = mean(v["close"])

In [None]:
closing_tuples = [(k,v) for k,v in average_closing.items()]
sorted(closing_tuples, key=lambda x:x[1])