In [1]:
import os

path_parent = os.path.dirname(os.getcwd())
os.chdir(path_parent)

In [2]:
from urllib.request import urlopen
import json
import numpy as np
import pandas as pd
from datetime import datetime, date
from tqdm.notebook import tqdm

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from app.utils.fetch_data import fred_fred, investing_api, alpha_vantage_api, FMP
from app.data_handling.feature_engineering import create_price_features

In [3]:
today = date.today().strftime("%Y-%m-%d")
start_date = '2010-01-01'
ticker = 'AAPL'

#### Fetch stock data

In [4]:
total_stock = investing_api('stock', ticker, start_date, today)

2022-07-22 00:30:14,846 [app.utils.fetch_data] [INFO] Fetching stock from investing_api: AAPL, from 01/01/2010 to 22/07/2022


In [5]:
# feature engineering
total_stock = create_price_features(total_stock)

In [6]:
total_stock.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,ticker,type,p_key,upper_shadow,lower_shadow,open2close,high2low,high2mean,low2mean,high2median,low2median
0,2010-01-04,7.62,7.66,7.58,7.64,493728704,USD,AAPL,stock,2010_01_04_AAPL,1.002618,1.005277,1.002625,1.010554,1.00459,0.994098,1.003932,0.993447
1,2010-01-05,7.66,7.7,7.62,7.66,601904640,USD,AAPL,stock,2010_01_05_AAPL,1.005222,1.005249,1.0,1.010499,1.005222,0.994778,1.005222,0.994778


#### Fetch insider trading data

In [7]:
fmp = FMP()

2022-07-22 00:30:17,717 [app.utils.fetch_data] [INFO] Financial Modeling Prep api ready.


In [8]:
insider = fmp.get_historical_insider_trade_ticker(ticker, num_pages=35)
# filter with the according date
insider = insider.loc[insider['transactionDate']>=start_date]

2022-07-22 00:30:24,935 [app.utils.fetch_data] [INFO] Fetching 35 pages of AAPL insider trade data.


In [9]:
# cleaning some of the columns
insider['reportingName'] = insider['reportingName'].str.lower()

In [10]:
insider.tail(2)

Unnamed: 0,index,symbol,filingDate,transactionDate,reportingCik,transactionType,securitiesOwned,companyCik,reportingName,typeOfOwner,acquistionOrDisposition,formType,securitiesTransacted,price,securityName,link
2272,72,AAPL,2010-02-04 17:43:55,2010-02-02,1214107,G-Gift,10487.0,320193,oppenheimer peter,"officer: Senior Vice President, CFO",D,4,1250.0,0.0,Common Stock,https://www.sec.gov/Archives/edgar/data/000032...
2273,73,AAPL,2010-01-06 17:29:05,2010-01-04,1252200,S-Sale,120781.0,320193,serlet bertrand,officer: Senior Vice President,D,4,10000.0,213.55,Common Stock,https://www.sec.gov/Archives/edgar/data/000032...


#### Create master dataset

In [11]:
# relevent columns from the insider dataset
insider_columns = ['transactionDate', 'transactionType', 'reportingName', 
                   'securitiesTransacted', 'price', 'securityName']

In [12]:
df = total_stock.merge(insider[insider_columns], left_on='Date', right_on='transactionDate')

In [13]:
df.head(2)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,ticker,type,p_key,...,high2mean,low2mean,high2median,low2median,transactionDate,transactionType,reportingName,securitiesTransacted,price,securityName
0,2010-01-04,7.62,7.66,7.58,7.64,493728704,USD,AAPL,stock,2010_01_04_AAPL,...,1.00459,0.994098,1.003932,0.993447,2010-01-04,S-Sale,serlet bertrand,10000.0,213.55,Common Stock
1,2010-02-02,7.0,7.01,6.91,6.99,698341952,USD,AAPL,stock,2010_02_02_AAPL,...,1.004658,0.990326,1.002144,0.987848,2010-02-02,G-Gift,oppenheimer peter,650.0,0.0,Common Stock


#### Adjust for stock split

In [14]:
stock_split = fmp.get_stock_split_history(ticker)

2022-07-22 00:31:54,714 [app.utils.fetch_data] [INFO] Fetching stock split history for AAPL


In [15]:
stock_split.head(2)

Unnamed: 0,date,label,numerator,denominator,symbol
0,2020-08-31,"August 31, 20",4.0,1.0,AAPL
1,2014-06-09,"June 09, 14",7.0,1.0,AAPL


#### Train and test, and validation

In [None]:
validation = 

In [None]:
# only common stocks.
# Restricted stock units are special kind of stocks that is given to the employee under certain conditions

In [112]:
volume_by_AD = insider.groupby(['transactionDate_Y_M', 'acquistionOrDisposition']).agg({"securitiesTransacted":"sum"}).reset_index()

volume_by_AD.head(2)

Unnamed: 0,transactionDate_Y_M,acquistionOrDisposition,securitiesTransacted
0,2016-04,A,219268.0
1,2016-04,D,352435.0


In [116]:
dates = volume_by_AD.transactionDate_Y_M.drop_duplicates()

acquisition = volume_by_AD.loc[volume_by_AD.acquistionOrDisposition=='A']
acquisition = pd.merge(dates, acquisition, how='left')
acquisition['acquistionOrDisposition'] = acquisition['acquistionOrDisposition'].fillna('A')
acquisition['securitiesTransacted'] = acquisition['securitiesTransacted'].fillna(0)

disposition = volume_by_AD.loc[volume_by_AD.acquistionOrDisposition=='D']
disposition = pd.merge(dates, disposition, how='left')
disposition['acquistionOrDisposition'] = disposition['acquistionOrDisposition'].fillna('D')
disposition['securitiesTransacted'] = disposition['securitiesTransacted'].fillna(0)

In [117]:
disposition.head(2)

Unnamed: 0,transactionDate_Y_M,acquistionOrDisposition,securitiesTransacted
0,2016-04,D,352435.0
1,2016-05,D,519946.0


In [118]:
acquisition.tail(3)

Unnamed: 0,transactionDate_Y_M,acquistionOrDisposition,securitiesTransacted
58,2022-03,A,13480.0
59,2022-04,A,620558.0
60,2022-05,A,0.0


In [122]:
fig = go.Figure()

trace1 = go.Bar(x=dates,
                y=acquistion.securitiesTransacted,
                name="Acquisition",
                marker_color='indianred')
trace2 = go.Bar(x=dates,
                y=disposition.securitiesTransacted,
                marker_color='blue',
                name="Disposition")

fig.add_trace(trace1)
fig.add_trace(trace2)

fig.update_layout(barmode='group')

In [66]:
list(insider.link)[0]

'https://www.sec.gov/Archives/edgar/data/0000320193/000032019322000063/0000320193-22-000063-index.htm'

In [None]:
trace = go.Bar(x=data[x],
               y=data[y],
               name=name,
               yaxis=yaxis)

In [None]:
# disposition and acquisition per date

In [None]:
go.Bar()

In [50]:
insider.columns

Index(['symbol', 'filingDate', 'transactionDate', 'reportingCik',
       'transactionType', 'securitiesOwned', 'companyCik', 'reportingName',
       'typeOfOwner', 'acquistionOrDisposition', 'formType',
       'securitiesTransacted', 'price', 'securityName', 'link'],
      dtype='object')

In [52]:
insider.transactionType.unique()

array(['G-Gift', 'S-Sale', 'M-Exempt', 'F-InKind', 'A-Award'],
      dtype=object)

In [53]:
insider.reportingName.unique()

array(['BELL JAMES A', 'Adams Katherine L.', "O'BRIEN DEIRDRE",
       'KONDO CHRIS', 'WILLIAMS JEFFREY E', 'Maestri Luca',
       'WAGNER SUSAN', 'SUGAR RONALD D', 'LOZANO MONICA C',
       'LEVINSON ARTHUR D', 'GORE ALBERT JR', 'Gorsky Alex',
       'JUNG ANDREA', 'COOK TIMOTHY D', 'IGER ROBERT A',
       'AHRENDTS ANGELA J', 'Srouji Johny', 'FEDERIGHI CRAIG',
       'Riccio Daniel J.', 'SCHILLER PHILIP W', 'Cue Eduardo H',
       'SEWELL D BRUCE'], dtype=object)

In [54]:
insider.typeOfOwner.unique()

array(['director', 'officer: SVP, GC and Secretary',
       'officer: Senior Vice President',
       'officer: Principal Accounting Officer', 'officer: COO',
       'officer: Senior Vice President, CFO',
       'director, officer: Chief Executive Officer',
       'officer: SVP, General Counsel',
       "officer: SVP, Gen'l Counsel, Secretary"], dtype=object)

In [60]:
insider[['reportingName', 'typeOfOwner']].drop_duplicates().sort_values(by='reportingName')

Unnamed: 0,reportingName,typeOfOwner
22,AHRENDTS ANGELA J,officer: Senior Vice President
1,Adams Katherine L.,"officer: SVP, GC and Secretary"
85,Adams Katherine L.,"officer: SVP, General Counsel"
0,BELL JAMES A,director
5,COOK TIMOTHY D,"director, officer: Chief Executive Officer"
46,Cue Eduardo H,officer: Senior Vice President
59,FEDERIGHI CRAIG,officer: Senior Vice President
45,GORE ALBERT JR,director
46,Gorsky Alex,director
92,IGER ROBERT A,director


In [55]:
insider.acquistionOrDisposition.unique()

array(['D', 'A'], dtype=object)

In [56]:
insider.formType.unique()

array(['4'], dtype=object)

In [57]:
insider.securityName.unique()

array(['Common Stock', 'Restricted Stock Unit',
       'Director Stock Option (Right to Buy)', 'Restricted Stock Units'],
      dtype=object)