# Predicting stock performance based on CEO image

## Loading in libraries

In [2]:
import datetime
import pandas as pd
import requests
import numpy as np
import os
from stocker import Stocker
from bs4 import BeautifulSoup
from urllib.request import urlopen
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from textblob import TextBlob
%matplotlib inline

In [11]:
tickers = pd.read_csv('stock_list.csv', header=None)[0]
stocker_by_ticker = {}
tickers.head()

0     PRO
1    MCBC
2    ENOC
3    LINC
4     SYF
Name: 0, dtype: object

## Loading in each stock's performance

In [13]:
for ticker in tickers[2040:]:
    stocker_by_ticker[ticker] = Stocker(ticker).stock
    stocker_by_ticker[ticker][ticker] = (stocker_by_ticker[ticker][['Adj. Open']] - stocker_by_ticker[ticker][['Adj. Open']].shift(1))/stocker_by_ticker[ticker][['Adj. Open']].shift(1)
    stocker_by_ticker[ticker] = stocker_by_ticker[ticker][['Date', ticker]]
    stocker_by_ticker[ticker][['Date']]= stocker_by_ticker[ticker]['Date'].apply(lambda x: pd.Timestamp(x))

GAIA Stocker Initialized. Data covers 1999-10-29 to 2018-03-27.
MOD Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
CRWN Stocker Initialized. Data covers 2000-05-08 to 2016-04-29.
CPN Stocker Initialized. Data covers 2008-01-10 to 2018-03-08.
SENEA Stocker Initialized. Data covers 1998-06-03 to 2018-03-27.
ASH Stocker Initialized. Data covers 1983-04-06 to 2018-03-27.
PCCC Stocker Initialized. Data covers 1998-03-03 to 2016-09-08.
DATA Stocker Initialized. Data covers 2013-05-17 to 2018-03-27.
TOL Stocker Initialized. Data covers 1987-12-30 to 2018-03-27.
COLB Stocker Initialized. Data covers 1992-06-16 to 2018-03-27.
HUM Stocker Initialized. Data covers 1981-12-31 to 2018-03-27.
FNFG Stocker Initialized. Data covers 1998-04-21 to 2016-07-29.
DCO Stocker Initialized. Data covers 1973-05-03 to 2018-03-27.
PFPT Stocker Initialized. Data covers 2012-04-20 to 2018-03-27.
DECK Stocker Initialized. Data covers 1993-10-15 to 2018-03-27.
REXX Stocker Initialized. Data covers 2007-07

MGLN Stocker Initialized. Data covers 2004-02-25 to 2018-03-27.
GRUB Stocker Initialized. Data covers 2014-04-04 to 2018-03-27.
FAF Stocker Initialized. Data covers 2010-05-28 to 2018-03-27.
CVD Stocker Initialized. Data covers 1996-12-17 to 2015-02-18.
GCI Stocker Initialized. Data covers 1985-07-01 to 2018-03-07.
MDSO Stocker Initialized. Data covers 2009-06-25 to 2018-03-27.
FHCO Stocker Initialized. Data covers 1999-02-11 to 2017-08-04.
AMCX Stocker Initialized. Data covers 2011-06-16 to 2018-03-27.
YDKN Stocker Initialized. Data covers 2014-07-07 to 2017-03-10.
DGAS Stocker Initialized. Data covers 1990-03-26 to 2017-09-20.
STLD Stocker Initialized. Data covers 1996-11-22 to 2018-03-27.
AGM Stocker Initialized. Data covers 1995-08-18 to 2018-03-27.
LKQ Stocker Initialized. Data covers 2003-10-06 to 2018-03-27.
DCI Stocker Initialized. Data covers 1987-06-18 to 2018-03-27.
HLT Stocker Initialized. Data covers 2017-06-19 to 2018-03-27.
AREX Stocker Initialized. Data covers 2007-11-0

DW Stocker Initialized. Data covers 1989-05-03 to 2016-12-30.
HIW Stocker Initialized. Data covers 1994-06-08 to 2018-03-27.
ELS Stocker Initialized. Data covers 1993-02-25 to 2018-03-27.
HSC Stocker Initialized. Data covers 1987-11-05 to 2018-03-27.
INFA Stocker Initialized. Data covers 1999-04-29 to 2015-08-06.
MW Stocker Initialized. Data covers 1992-04-15 to 2016-01-29.
MPX Stocker Initialized. Data covers 2001-03-01 to 2018-03-27.
WU Stocker Initialized. Data covers 2006-10-02 to 2018-03-27.
IRM Stocker Initialized. Data covers 1996-02-01 to 2018-03-27.
HEOP Stocker Initialized. Data covers 1999-04-23 to 2017-03-31.
FWRD Stocker Initialized. Data covers 1993-11-16 to 2018-03-27.
TDS Stocker Initialized. Data covers 1991-09-18 to 2018-03-27.
CPGX Stocker Initialized. Data covers 2015-07-02 to 2016-06-30.
BWS Stocker Initialized. Data covers 1984-09-07 to 2015-05-28.
SCTY Stocker Initialized. Data covers 2012-12-13 to 2016-11-21.
EBAY Stocker Initialized. Data covers 1998-09-24 to 2

RHI Stocker Initialized. Data covers 1992-03-10 to 2018-03-27.
MNI Stocker Initialized. Data covers 1989-06-30 to 2018-03-27.
ARAY Stocker Initialized. Data covers 2007-02-08 to 2018-03-27.
DWSN Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
ESGR Stocker Initialized. Data covers 1997-05-09 to 2018-03-27.
PBH Stocker Initialized. Data covers 2005-02-10 to 2018-03-27.
EE Stocker Initialized. Data covers 1989-07-07 to 2018-03-27.
CPT Stocker Initialized. Data covers 1993-07-22 to 2018-03-27.
MUR Stocker Initialized. Data covers 1983-04-06 to 2018-03-07.
NATI Stocker Initialized. Data covers 1995-03-14 to 2018-03-27.
IQNT Stocker Initialized. Data covers 2007-11-02 to 2017-02-09.
RLGY Stocker Initialized. Data covers 2012-10-11 to 2018-03-27.
IMI Stocker Initialized. Data covers 2011-11-18 to 2018-03-27.
MDU Stocker Initialized. Data covers 1987-11-05 to 2018-03-27.
MSM Stocker Initialized. Data covers 1995-12-15 to 2018-03-27.
RP Stocker Initialized. Data covers 2010-08-12 to 

PGC Stocker Initialized. Data covers 1999-04-27 to 2018-03-27.
LVS Stocker Initialized. Data covers 2004-12-15 to 2018-03-27.
SIR Stocker Initialized. Data covers 2012-03-07 to 2018-03-27.
SLG Stocker Initialized. Data covers 1997-08-15 to 2018-03-27.
TRST Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
ANSS Stocker Initialized. Data covers 1996-06-21 to 2018-03-27.
FCBC Stocker Initialized. Data covers 1997-05-09 to 2018-03-27.
CBG Stocker Initialized. Data covers 2004-06-10 to 2018-03-19.
SSNC Stocker Initialized. Data covers 2010-03-31 to 2018-03-27.
KPTI Stocker Initialized. Data covers 2013-11-06 to 2018-03-27.
EBTC Stocker Initialized. Data covers 2005-02-14 to 2018-03-27.
BCOR Stocker Initialized. Data covers 1998-12-15 to 2018-03-27.
PMC Stocker Initialized. Data covers 2007-08-01 to 2017-12-07.
RDNT Stocker Initialized. Data covers 1997-01-03 to 2018-03-27.
VPRT Stocker Initialized. Data covers 2005-09-30 to 2014-11-12.
MCP Stocker Initialized. Data covers 2010-07-2

FBRC Stocker Initialized. Data covers 2007-06-08 to 2017-06-01.
SIG Stocker Initialized. Data covers 1994-10-31 to 2018-03-27.
GTY Stocker Initialized. Data covers 1973-05-03 to 2018-03-27.
BRSS Stocker Initialized. Data covers 2013-05-23 to 2018-03-27.
IART Stocker Initialized. Data covers 1995-08-16 to 2018-03-27.
CTRN Stocker Initialized. Data covers 2005-05-18 to 2018-03-27.
BLT Stocker Initialized. Data covers 1999-08-20 to 2016-04-11.
OC Stocker Initialized. Data covers 2006-11-01 to 2018-03-27.
MDC Stocker Initialized. Data covers 1980-03-11 to 2018-03-27.
HT Stocker Initialized. Data covers 1999-01-21 to 2018-03-27.
TNGO Stocker Initialized. Data covers 2011-07-27 to 2017-03-13.
DST Stocker Initialized. Data covers 1995-11-01 to 2018-03-27.
DTLK Stocker Initialized. Data covers 1999-08-06 to 2017-01-06.
LABL Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
BSX Stocker Initialized. Data covers 1992-05-19 to 2018-03-27.
HMN Stocker Initialized. Data covers 1991-11-15 to

ATRS Stocker Initialized. Data covers 1996-10-03 to 2018-03-27.
XOXO Stocker Initialized. Data covers 1999-12-02 to 2018-03-27.
HEI Stocker Initialized. Data covers 1992-03-17 to 2018-03-27.
TEAR Stocker Initialized. Data covers 2004-12-09 to 2017-11-07.
UEIC Stocker Initialized. Data covers 1993-02-12 to 2018-03-27.
RCAP Stocker Initialized. Data covers 2013-06-05 to 2015-12-31.
KODK Stocker Initialized. Data covers 2013-09-23 to 2018-03-27.
NCFT Stocker Initialized. Data covers 2013-11-07 to 2015-05-11.
NTK Stocker Initialized. Data covers 2010-06-15 to 2016-08-30.
TOWN Stocker Initialized. Data covers 1999-05-05 to 2018-03-27.
RAIL Stocker Initialized. Data covers 2005-04-06 to 2018-03-27.
TECUA Stocker Initialized. Data covers 1995-08-18 to 2014-05-01.
WIN Stocker Initialized. Data covers 2005-02-09 to 2018-03-07.
NAV Stocker Initialized. Data covers 1970-01-02 to 2018-03-27.
TG Stocker Initialized. Data covers 1990-01-12 to 2018-03-27.
DXM Stocker Initialized. Data covers 2013-05-

HLIT Stocker Initialized. Data covers 1995-05-24 to 2018-03-27.
MXIM Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
CLNE Stocker Initialized. Data covers 2007-05-25 to 2018-03-27.
SZYM Stocker Initialized. Data covers 2011-05-27 to 2016-05-10.
UTL Stocker Initialized. Data covers 1992-03-17 to 2018-03-27.
TBNK Stocker Initialized. Data covers 2009-07-13 to 2018-03-27.
FNLC Stocker Initialized. Data covers 1999-07-14 to 2018-03-27.
CMI Stocker Initialized. Data covers 1984-12-18 to 2018-03-27.
KOP Stocker Initialized. Data covers 2006-02-01 to 2018-03-27.
WAB Stocker Initialized. Data covers 1995-06-16 to 2018-03-27.
CVA Stocker Initialized. Data covers 1992-03-17 to 2018-03-27.
VVUS Stocker Initialized. Data covers 1994-04-07 to 2018-03-27.
EA Stocker Initialized. Data covers 1990-03-26 to 2018-03-27.
EAT Stocker Initialized. Data covers 1989-07-28 to 2018-03-27.
NOW Stocker Initialized. Data covers 2012-06-29 to 2018-03-27.
AVD Stocker Initialized. Data covers 1988-06-22 t

CEMP Stocker Initialized. Data covers 2012-02-06 to 2017-11-03.
ROK Stocker Initialized. Data covers 1981-12-31 to 2018-03-27.
CSH Stocker Initialized. Data covers 1987-12-30 to 2016-09-01.
NWE Stocker Initialized. Data covers 2007-12-28 to 2018-03-27.
EVR Stocker Initialized. Data covers 2006-08-14 to 2018-03-27.
PNW Stocker Initialized. Data covers 1984-07-19 to 2018-03-27.
TRC Stocker Initialized. Data covers 1992-03-17 to 2018-03-27.
WSTC Stocker Initialized. Data covers 2013-03-22 to 2017-10-10.
ZGNX Stocker Initialized. Data covers 2010-11-23 to 2018-03-27.
EOG Stocker Initialized. Data covers 1989-10-04 to 2018-03-27.
AKAM Stocker Initialized. Data covers 1999-10-29 to 2018-03-27.
FST Stocker Initialized. Data covers 1990-03-26 to 2014-12-15.
XONE Stocker Initialized. Data covers 2013-02-07 to 2018-03-27.
TRUE Stocker Initialized. Data covers 2000-05-02 to 2018-03-27.
RALY Stocker Initialized. Data covers 2013-04-12 to 2015-07-07.
ISSI Stocker Initialized. Data covers 1995-02-03

In [15]:
# import pickle
# pickle.dump( stocker_by_ticker, open( "stocker_by_ticker.pkl", "wb" ) )

In [16]:
profits_frame = stocker_by_ticker['PRO'][(stocker_by_ticker['PRO'].Date>=pd.Timestamp(2014,1,1)) & (stocker_by_ticker['PRO'].Date<pd.Timestamp(2018,1,1))]
for ticker in tickers[1:]:
    within_time = stocker_by_ticker[ticker][(stocker_by_ticker[ticker].Date>=pd.Timestamp(2014,1,1)) & (stocker_by_ticker[ticker].Date<pd.Timestamp(2018,1,1))]
    if len(within_time) == len(profits_frame):
        profits_frame = profits_frame.merge(within_time)
profits_frame.head()

Unnamed: 0,Date,PRO,MCBC,LINC,FLDM,ECHO,DISH,NRCIA,NAT,FTNT,...,ELLI,RGR,AMAT,ENS,FOXF,BSET,DAN,OPY,ULTI,PSA
0,2014-01-02,-0.014903,-0.013807,0.0,-0.013233,-0.015242,-0.000345,0.004902,0.009434,0.013221,...,-0.040502,0.004809,0.009127,0.002003,-0.012408,-0.018638,0.003601,0.006905,0.004193,-0.008238
1,2014-01-03,-0.008825,-0.014,-0.016064,-0.032343,-0.0394,-0.002587,0.004878,-0.006231,0.003132,...,0.001494,0.007931,-0.008479,-0.014135,-0.032553,-0.026195,-0.002563,-0.0238,-0.004893,-0.009104
2,2014-01-06,0.02493,0.020284,0.044898,0.008967,0.012695,0.003976,-0.017799,0.009404,0.010406,...,0.006714,0.00095,-0.002281,0.011731,0.005903,0.028917,-0.010791,0.009504,-0.002098,0.010595
3,2014-01-07,-0.004716,-0.015905,-0.007813,-0.009426,-0.003375,-0.004133,0.031851,0.056936,-0.005149,...,-0.005187,0.026701,-0.007429,-0.006728,0.018192,-0.014379,0.007792,-0.014736,-0.002562,-0.004446
4,2014-01-08,-0.013466,0.010101,-0.003937,0.043774,0.004693,-0.005879,-0.018095,0.008815,0.030538,...,0.021229,0.021386,0.001727,-0.004323,-0.023631,-0.006631,0.012887,-0.000831,0.011856,-6.7e-05


In [20]:
# pickle.dump( profits_frame, open( "all_stock_profits.pkl", "wb" ) )
profits_frame.columns

Index(['Date', 'PRO', 'MCBC', 'LINC', 'FLDM', 'ECHO', 'DISH', 'NRCIA', 'NAT',
       'FTNT',
       ...
       'ELLI', 'RGR', 'AMAT', 'ENS', 'FOXF', 'BSET', 'DAN', 'OPY', 'ULTI',
       'PSA'],
      dtype='object', length=2306)

## Calculating returns

In [36]:
## Calculating returns for horoscope bot
percentage_returns = {}
for ticker in profits_frame.columns[1:]:
    money = [100]
    for i in range(len(profits_frame)):
        money.append(money[-1] + money[-1]*profits_frame[ticker].iloc[i])
    percentage_returns[ticker]=money
    

In [41]:
percentage_returns['BSET'][-1]

274.2580644188391

In [42]:
made_profit = []
for ticker in profits_frame.columns[1:]:
    if percentage_returns[ticker][-1] > 100:
        made_profit.append([ticker, 1])
    else:
        made_profit.append([ticker, 0])

In [44]:
profits_table = pd.DataFrame(made_profit)
profits_table.sum()

0    PROMCBCLINCFLDMECHODISHNRCIANATFTNTGISASCWREQR...
1                                                 1596
dtype: object

In [47]:
profits_table.to_csv('stock_profits.csv', index=False)

In [50]:
profits_tbl = pd.read_csv('stock_profits2.csv')
profits_tbl.describe()

Unnamed: 0,Profit
count,2305.0
mean,0.692408
std,0.461597
min,0.0
25%,0.0
50%,1.0
75%,1.0
max,1.0
