# Analyzing Stock Market Prices

The purpose of this project is to analyze stock data using a dictionary of dataframes pulled from yahoo finance from 1/1/2007 to 4/17/2017

In [3]:
import os
import pandas as pd

stock_prices = {}

for fn in os.listdir("prices"): # multiple csvs in a directory
    name = fn.split(".")[0] #removes the .csv for titles
    stock_prices[name] = pd.read_csv(os.path.join("prices", fn))

In [33]:
stock_prices["aapl"] # reviewing data

Unnamed: 0,date,close,open,high,low,volume
0,2007-01-03,83.800002,86.289999,86.579999,81.899999,309579900
1,2007-01-04,85.659998,84.050001,85.949998,83.820003,211815100
2,2007-01-05,85.049997,85.770000,86.199997,84.400002,208685400
3,2007-01-08,85.470000,85.959998,86.529998,85.280003,199276700
4,2007-01-09,92.570003,86.450003,92.979999,85.150000,837324600
...,...,...,...,...,...,...
2585,2017-04-10,143.169998,143.600006,143.880005,142.899994,18473000
2586,2017-04-11,141.630005,142.940002,143.350006,140.059998,30275300
2587,2017-04-12,141.800003,141.600006,142.149994,141.009995,20238900
2588,2017-04-13,141.050003,141.910004,142.380005,141.050003,17652900


In [5]:
# creating a dictionary with the stock as key and mean closing price
closing_data = {}
for stock in stock_prices:
    closing_data[stock] = stock_prices[stock]["close"].mean()

In [6]:
# pulling the minimum closing price for all stocks
min_key = min(closing_data, key=closing_data.get)
min_value = closing_data[min_key]

# Print the result
print(f"Key with minimum value: {min_key}, Value: {min_value}")

Key with minimum value: blfs, Value: 0.8122763011583011


In [34]:
# pulling the minimum closing price for all stocks
max_key = max(closing_data, key=closing_data.get)
max_value = closing_data[max_key]

# Print the result
print(f"Key with max value: {max_key}, Value: {max_value}")

Key with max value: amzn, Value: 275.13407757104255


In [7]:
# creating a dictionary with the day as keys and a tuple of (volume, stock)
trades_by_day = {}

for stock_sym in stock_prices:
    for index, row in stock_prices[stock_sym].iterrows():
        day = row["date"]
        volume = row["volume"]
        pair = (volume, stock_sym)
        if day not in trades_by_day:
            trades_by_day[day] = []
        trades_by_day[day].append(pair)

In [8]:
# creating a dictionary with days as the keys and a tuple of (volume, stock) for the max
max_trade = {}
for day, trades in trades_by_day.items():
    for volume, sym in trades:
        if day not in max_trade:
            max_trade[day] = (volume, sym)
        else:
            if volume > max_trade[day][0]:
                max_trade[day] = (volume, sym)     

In [9]:
# printing max trades per day
for day, trade in max_trade.items():
    print(f"Max trade on {day}: {trade}")

Max trade on 2007-01-03: (309579900, 'aapl')
Max trade on 2007-01-04: (211815100, 'aapl')
Max trade on 2007-01-05: (208685400, 'aapl')
Max trade on 2007-01-08: (199276700, 'aapl')
Max trade on 2007-01-09: (837324600, 'aapl')
Max trade on 2007-01-10: (738220000, 'aapl')
Max trade on 2007-01-11: (360063200, 'aapl')
Max trade on 2007-01-12: (328172600, 'aapl')
Max trade on 2007-01-16: (311019100, 'aapl')
Max trade on 2007-01-17: (411565000, 'aapl')
Max trade on 2007-01-18: (591151400, 'aapl')
Max trade on 2007-01-19: (341118400, 'aapl')
Max trade on 2007-01-22: (363506500, 'aapl')
Max trade on 2007-01-23: (301856100, 'aapl')
Max trade on 2007-01-24: (231953400, 'aapl')
Max trade on 2007-01-25: (226493400, 'aapl')
Max trade on 2007-01-26: (246718500, 'aapl')
Max trade on 2007-01-29: (225416100, 'aapl')
Max trade on 2007-01-30: (144492600, 'aapl')
Max trade on 2007-01-31: (214017300, 'aapl')
Max trade on 2007-02-01: (166085500, 'aapl')
Max trade on 2007-02-02: (155382500, 'aapl')
Max trade 

In [10]:
# creating a dictionary with the day as keys and total volume as values
day_sums = {}

for day, trades in trades_by_day.items():
    for volume, stock in trades:
        if day not in day_sums:
            day_sums[day]= volume
        else:
            day_sums[day]+= volume

In [11]:
# sorting top volume days dictionary
sorted_day_totals = sorted(day_sums.items(), key=lambda item: item[1], reverse = True)

In [13]:
# viewing top 10 days
sorted_day_totals[0:10]

[('2008-01-23', 1964583900),
 ('2008-10-10', 1770266900),
 ('2007-07-26', 1611272800),
 ('2008-10-08', 1599183500),
 ('2008-01-22', 1578877700),
 ('2008-02-07', 1559032100),
 ('2008-09-29', 1555072400),
 ('2007-11-08', 1553880500),
 ('2008-01-16', 1536176400),
 ('2008-01-24', 1533363200)]

In [29]:
# calculating the most profitable stocks on percent increase
most_profitable = {}
for stock in stock_prices:
    df = stock_prices[stock]
    first_close = df["close"].iloc[0]
    last_end = df["close"].iloc[-1]
    percent = (last_end - first_close)/ first_close * 100
    most_profitable[stock] = percent

In [30]:
# sorting most profitable
sorted_profit = sorted(most_profitable.items(), key = lambda item: item[1], reverse = True)

In [32]:
# top 10 most profitable from 2007 to 2017
sorted_profit[0:10]

[('admp', 7483.8389225948395),
 ('adxs', 4005.0000000000005),
 ('arcw', 3898.60048982856),
 ('blfs', 2437.4365640858978),
 ('amzn', 2230.7234281466817),
 ('anip', 1707.3554472785033),
 ('apdn', 1549.6700659868025),
 ('cui', 1525.1625162516252),
 ('bcli', 1339.2137535980346),
 ('achc', 1330.0000666666667)]