In [4]:
import pandas as pd
import os

data = dict()

for fn in os.listdir("prices"):
    df = pd.read_csv(os.path.join("prices", fn))
    data[os.path.splitext(fn)[0]] = df

data["aapl"].head()

Unnamed: 0,date,close,open,high,low,volume
0,2007-01-03,83.800002,86.289999,86.579999,81.899999,309579900
1,2007-01-04,85.659998,84.050001,85.949998,83.820003,211815100
2,2007-01-05,85.049997,85.77,86.199997,84.400002,208685400
3,2007-01-08,85.47,85.959998,86.529998,85.280003,199276700
4,2007-01-09,92.570003,86.450003,92.979999,85.15,837324600


In [6]:
average_closing_prices = {stock:df["close"].mean() for (stock,df) in data.items()}
min_closing_prices = {stock:df["close"].min() for (stock,df) in data.items()}
max_closing_prices = {stock:df["close"].max() for (stock,df) in data.items()}

In [8]:
trade_volume = dict()

for stock, df in data.items():
    for _, row in df.iterrows():
        if row.date not in trade_volume:
            trade_volume[row.date] = []
        trade_volume[row.date].append((row.volume, stock))
print("{} days of trading in record".format(len(trade_volume)))

2636 days of trading in record


In [9]:
trade_volume['2007-01-03']

[(30100, 'dgica'),
 (100, 'bdge'),
 (36500, 'cvco'),
 (365800, 'blkb'),
 (108200, 'bbox'),
 (192600, 'ffbc'),
 (400, 'fbiz'),
 (46300, 'ffic'),
 (29500, 'bdsi'),
 (12908400, 'amgn'),
 (1703300, 'expe'),
 (2343300, 'expd'),
 (25100, 'clct'),
 (269000, 'alny'),
 (72900, 'evol'),
 (127900, 'ahgp'),
 (4700, 'dfbg'),
 (1312400, 'afsi'),
 (203200, 'chy'),
 (1946200, 'bmrn'),
 (993300, 'agys'),
 (19700, 'adrd'),
 (380300, 'drrx'),
 (1216400, 'crus'),
 (15500, 'brew'),
 (0, 'fbms'),
 (1600, 'emcf'),
 (63000, 'bsqr'),
 (4300, 'csfl'),
 (1146900, 'car'),
 (39543600, 'cmcsa'),
 (241800, 'cmtl'),
 (820400, 'elos'),
 (11600, 'eltk'),
 (151000, 'agii'),
 (23300, 'coke'),
 (4000, 'egan'),
 (114000, 'cpss'),
 (2596900, 'adtn'),
 (1740400, 'ffiv'),
 (3500, 'cspi'),
 (2700, 'bwen'),
 (434200, 'cgnx'),
 (5240400, 'cdns'),
 (10600, 'egt'),
 (485600, 'cray'),
 (500, 'arcw'),
 (27400, 'bncn'),
 (1100, 'admp'),
 (205100, 'cnsl'),
 (193000, 'abax'),
 (2000, 'aris'),
 (257600, 'cyrn'),
 (8500, 'asys'),
 (700, 

In [10]:
most_traded_stocks = {day:max(trades, key=lambda item:item[0]) for (day, trades) in trade_volume.items()}

In [11]:
most_traded_stocks['2007-01-03']

(309579900, 'aapl')

In [12]:
total_volume = {day:sum(volume for volume, _ in trades) for (day, trades) in trade_volume.items()}

In [13]:
total_volume['2007-01-03']

996359900

In [14]:
top_trading_days = sorted(total_volume, key=total_volume.get, reverse=True)
top_trading_days[:10]

['2008-01-23',
 '2008-10-10',
 '2007-07-26',
 '2008-10-08',
 '2008-01-22',
 '2008-02-07',
 '2008-09-29',
 '2007-11-08',
 '2008-01-16',
 '2008-01-24']

Trading volume appears to have peaked in and around the time of the 2007-2008 financial crisis

In [28]:
growth = lambda df: (df.loc[df.shape[0] - 1, "close"] - df.loc[0, "close"])/df.loc[0, "open"]
stock_growth = {stock:growth(df) for (stock, df) in data.items()}
top_stocks = sorted(stock_growth, key=stock_growth.get, reverse=True)
for stock in top_stocks[:10]:
    print("{} grew {:.0%}".format(stock, stock_growth[stock]))

admp grew 7484%
adxs grew 4450%
arcw grew 3899%
blfs grew 2786%
amzn grew 2232%
anip grew 1683%
apdn grew 1550%
cui grew 1525%
axgn grew 1486%
bcli grew 1442%


The highest growth stocks from 2007-01-03 to 2017-04-17 are:
- ADMP (Adamis Pharmaceuticals, healthcare)
- ADXS (Advaxis, healthcare)
- ARCW (ARC Group Worldwide, industrial)
- BLFS (BioLife Solutions, healthcare)
- AMZN (Amazon, consumer cyclical)
- ANIP (ANI Pharmaceuticals, healthcare)
- APDN (Applied DNA Sciences, healthcare)
- CUI (Cuisine Solutions, consumer defensive)
- AXGN (AxoGen, healthcare)
- BCLI (Brainstorm Cell Therapeutics, healthcare)

In [29]:
for stock in top_stocks[-10:]:
    print("{} grew {:.0%}".format(stock, stock_growth[stock]))

dcth grew -92%
dest grew -92%
falc grew -93%
bbry grew -93%
atlc grew -93%
cmls grew -98%
cetv grew -98%
bont grew -103%
clsn grew -109%
aray grew -113%


The stocks best to short at the start of the January 2007 to April 2017 period:
- ARAY (Accuray, healthcare)
- CLSN (Celsion Corporation, healthcare)
- BONT (Bon-Ton Stores, dept store retail)
- CETV (Central European Media Enterprises, media)
- CMLS (Cumulus Media, media)
- ATLC (Atlanticus Holdings, finance)
- BBRY (Blackberry, tech)
- FALC (FalconStor Software, tech)
- DEST (Destiny Pharma, healthcare)
- DCTH (Delcath Systems, healthcare)

In [42]:
optimal_buy_days = {stock:df.iloc[df['open'].idxmin()].date for (stock, df) in data.items()}
print("If holding until now: \n")
for stock in top_stocks[:20]:
    print("Best day to purchase {} is {}".format(stock.upper(), optimal_buy_days[stock]))

If holding until now: 

Best day to purchase ADMP is 2008-12-22
Best day to purchase ADXS is 2008-11-24
Best day to purchase ARCW is 2007-01-18
Best day to purchase BLFS is 2008-10-15
Best day to purchase AMZN is 2008-11-20
Best day to purchase ANIP is 2012-05-21
Best day to purchase APDN is 2008-10-06
Best day to purchase CUI is 2009-02-18
Best day to purchase AXGN is 2007-03-16
Best day to purchase BCLI is 2009-07-21
Best day to purchase ACHC is 2009-03-11
Best day to purchase BSTC is 2007-04-02
Best day to purchase EGHT is 2008-12-08
Best day to purchase AXDX is 2009-12-31
Best day to purchase CLFD is 2007-08-14
Best day to purchase COOL is 2014-06-10
Best day to purchase CRUS is 2009-01-21
Best day to purchase CORT is 2007-03-29
Best day to purchase AEMD is 2011-09-30
Best day to purchase ABMD is 2009-04-01


Which stocks have the most after-hours trading, and show the biggest changes between the closing price and the next day open?
Can technical indicators like Bollinger Bands help us forecast the market?
What time periods have resulted in steady increases in prices, and what periods have resulted in steady declines?
On days with high trading volume, do stocks move in one direction (up or down) more than the other one?