In [1]:
import pandas as pd
import os
import datetime as dt
import pickle
import numpy as np
import nbimporter
from exploratory_analysis import load_obj, returnDataOnDate, returnVolumeAndPriceChange
import warnings
warnings.filterwarnings('ignore')

In [2]:
DAYS_TO_LOOK_FORWARD = 30  # number of days after trade to look for max/min price (see exploratory_analysis)
DAYS_TO_LOOK_BACK = 1  # number of days before trade to look for price trends / related insider trades

In [3]:
train_data = pd.read_csv('data/insiderDat_June_clean.csv')
train_data.head()

Unnamed: 0,FilingDate,TradeDate,Ticker,CompanyName,InsiderName,Title,TradeType,Price,Qty,Owned,DeltaOwn,Value
0,2021-06-01,2021-06-01,CARE,"Carter Bankshares, Inc.",Van Dyke Litz H,CEO,P - Purchase,15.41,129.0,18557.0,1.0,1988.0
1,2021-06-01,2021-05-27,PANL,Pangaea Logistics Solutions Ltd.,"Pangaea One Acquisition Holdings Xiv, LLC",10%,S - Sale,4.21,-184466.0,6788742.0,-3.0,-775965.0
2,2021-06-01,2021-05-27,TALO,Talos Energy Inc.,"Apollo Management Holdings Gp, LLC",10%,S - Sale,14.1,-81942.0,18808537.0,0.0,-1155620.0
3,2021-06-01,2019-07-18,CLXT,"Calyxt, Inc.",Dumont Philippe,Dir,S - Sale,10.0,-706.0,16354.0,-4.0,-7060.0
4,2021-06-01,2021-05-27,FLUX,"Flux Power Holdings, Inc.",Johnson Michael,"Dir, 10%",S - Sale,10.55,-38738.0,4584558.0,-1.0,-408564.0


In [4]:
train_data['FilingDate'] = pd.to_datetime(train_data['FilingDate']).dt.date
train_data['TradeDate'] = pd.to_datetime(train_data['TradeDate']).dt.date

In [5]:
historicDat = load_obj('historicDat')
historicDat['OPOF']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-06-01,23.250000,23.250000,23.250000,23.250000,22.616158,0
2021-06-02,23.000000,23.000000,23.000000,23.000000,22.372974,900
2021-06-03,23.340000,23.340000,23.340000,23.340000,22.703703,300
2021-06-04,23.299999,23.299999,23.299999,23.299999,22.664793,1400
2021-06-07,23.090000,23.250000,23.059999,23.059999,22.431335,52300
...,...,...,...,...,...,...
2021-11-23,22.900000,22.900000,22.709999,22.709999,22.220882,600
2021-11-24,22.709999,22.709999,22.709999,22.709999,22.220882,0
2021-11-26,22.709999,22.709999,22.709999,22.709999,22.220882,0
2021-11-29,22.709999,22.709999,22.709999,22.709999,22.220882,0


In [6]:
allTickers = set()

# updates the set with all of the dictonary's keys
allTickers.update(historicDat)

In [7]:
train_data[['NumTrades','TradeToFileTime', '%VolumeChange','%FuturePriceChange']] = 0
# ['NumTradesCAT', 'TradeToFileTimeCAT','%VolumeChangeCAT']
startDate = dt.datetime.strptime('2021-06-01', '%Y-%m-%d').date()
endDate = dt.datetime.strptime('2021-06-30', '%Y-%m-%d').date()
delta = endDate - startDate

for trade in train_data.iterrows():
    tradeNum = trade[0]
    tradeTick = trade[1]['Ticker']
    tradeDate = trade[1]['TradeDate']
    tradeFileDate = trade[1]['FilingDate']
    
    # skip the first DAYS_TO_LOOK_BACK days so we have data to look back at
    if (tradeFileDate - dt.timedelta(days=DAYS_TO_LOOK_BACK)) < startDate:
        continue
    

    # compute percentage change in shares owned by insider
    owned = train_data.at[tradeNum, 'Owned']
    shareChange = train_data.at[tradeNum, 'Qty']
    if owned != shareChange:
        train_data.at[tradeNum, 'DeltaOwn'] = 100*shareChange / (owned-shareChange)
    
    
    # compute total value of insider's trade
    train_data.at[tradeNum, 'Value'] = shareChange*train_data.at[tradeNum, 'Price']
    
    
    # compute total value of insider's shares
    # TODO
    
    
    # compute and categorize time gaps between trades and filings
    tradeToFileTime = (tradeFileDate - tradeDate).days
    train_data.at[tradeNum, 'TradeToFileTime'] = tradeToFileTime
    '''
    if tradeToFileTime == 0:
        train_data.at[tradeNum, 'TradeToFileTimeCAT'] = '0'
    elif tradeToFileTime == 1:
        train_data.at[tradeNum, 'TradeToFileTimeCAT'] = '1'
    else:
        train_data.at[tradeNum, 'TradeToFileTimeCAT'] = '>1'
    '''

    # compute and categorize the number of same-ticker trades in the last DAYS_TO_LOOK_BACK days
    numTickerTrades = train_data.loc[(train_data['Ticker'] == tradeTick)
                                    & (train_data['FilingDate'] <= tradeFileDate)
                                    & (train_data['FilingDate'] 
                                       >= tradeFileDate-dt.timedelta(days=DAYS_TO_LOOK_BACK))].shape[0]

    train_data.at[tradeNum, 'NumTrades'] = numTickerTrades
    '''
    if numTickerTrades == 1:
        train_data.at[tradeNum, 'NumTradesCAT'] = '1'
    elif numTickerTrades == 2:
        train_data.at[tradeNum, 'NumTradesCAT'] = '2'
    else:
        train_data.at[tradeNum, 'NumTradesCAT'] = '>2'
    '''

    # compute and categorize the percentage volume change in the last DAYS_TO_LOOK_BACK days
    # compute the most best closing price percentage change in the next DAYS_TO_LOOK_FORWARD days
    percentChangeVol, percentChangePrice = returnVolumeAndPriceChange(historicDat, tradeTick, tradeFileDate, 
                                                                     DAYS_TO_LOOK_FORWARD, DAYS_TO_LOOK_BACK)
    train_data.at[tradeNum, '%FuturePriceChange'] = percentChangePrice
    
    train_data.at[tradeNum, '%VolumeChange'] = percentChangeVol
    '''
    if percentChangeVol <= -10:
        train_data.at[tradeNum, '%VolumeChangeCAT'] = '<-10%'
    elif percentChangeVol > -10 and percentChangeVol <= 0:
        train_data.at[tradeNum, '%VolumeChangeCAT'] = '-10 to 0%'
    elif percentChangeVol > 0 and percentChangeVol <= 10:
        train_data.at[tradeNum, '%VolumeChangeCAT'] = '0 to 10%'
    else:
        train_data.at[tradeNum, '%VolumeChangeCAT'] = '>10%'
    '''

In [8]:
train_data.iloc[347]

FilingDate                           2021-06-02
TradeDate                            2021-05-28
Ticker                                     GRTX
CompanyName           Galera Therapeutics, Inc.
InsiderName                          West Linda
Title                                       Dir
TradeType                          P - Purchase
Price                                      8.64
Qty                                     20000.0
Owned                                   20000.0
DeltaOwn                               999999.0
Value                                  172800.0
NumTrades                                     1
TradeToFileTime                               5
%VolumeChange                         -7.673667
%FuturePriceChange                    21.678318
Name: 347, dtype: object

In [9]:
train_data.loc[train_data['FilingDate'] == dt.datetime.fromisoformat('2021-06-02').date()]

Unnamed: 0,FilingDate,TradeDate,Ticker,CompanyName,InsiderName,Title,TradeType,Price,Qty,Owned,DeltaOwn,Value,NumTrades,TradeToFileTime,%VolumeChange,%FuturePriceChange
229,2021-06-02,2021-06-01,FRME,First Merchants Corp,Sherman Patrick A,Dir,P - Purchase,45.86,11.0,47945.0,0.022948,504.46,1,1,66.338924,-0.499239
230,2021-06-02,2021-06-01,MSEX,Middlesex Water Co,Oconnor A Bruce,"SVP, CFO",S - Sale,85.77,-3950.0,34029.0,-10.400484,-338791.50,1,1,-34.395501,3.456387
231,2021-06-02,2021-06-01,TPL,Texas Pacific Land Corp,Stahl Murray,Dir,P - Purchase,1499.04,29.0,376577.0,0.007702,43472.16,2,1,-27.464789,8.400529
232,2021-06-02,2021-06-01,MGPI,Mgp Ingredients Inc,Seaberg Karen,"Dir, 10%",S - Sale,70.35,-71.0,3241961.0,-0.002190,-4994.85,2,1,22.026094,0.946665
233,2021-06-02,2021-06-01,TSE,Trinseo S.A.,M&g Investment Management Ltd,10%,S - Sale,66.50,-3000.0,7634044.0,-0.039282,-199500.00,1,1,-16.205441,-0.475129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,2021-06-02,2021-06-02,WTBA,West Bancorporation Inc,Gulling Douglas R,"EVP, TREASURER, CFO",S - Sale,28.17,-5000.0,112460.0,-4.256768,-140850.00,1,0,54.575163,4.080921
570,2021-06-02,2021-06-01,SF,Stifel Financial Corp,Nesi Victor,Co-Pres,S - Sale,69.94,-10000.0,284148.0,-3.399649,-699400.00,1,1,-8.483926,-0.087765
571,2021-06-02,2021-05-28,OPOF,Old Point Financial Corp,Glasser Michael A,Dir,P - Purchase,23.00,100.0,2717.0,3.821169,2300.00,4,5,9999.000000,13.173916
572,2021-06-02,2021-06-01,HBAN,Huntington Bancshares Inc /md/,Pierce Sandra E.,Senior Exec VP,S - Sale,16.00,-85986.0,443258.0,-16.246948,-1375776.00,1,1,-31.917184,-0.377836


In [10]:
train_data.to_csv('data/training_data.csv')