In this notebook, we add all of the derived trade features to the features already obtained from the CSV.

In [12]:
import pandas as pd
import os
import datetime as dt
import pickle
import numpy as np
import nbimporter
import sys

'''
Import functions from other notebooks I wrote.
'''
mods = ['A_exploratory_analysis']
[sys.modules.pop(mod) for mod in mods if mod in sys.modules]
from A_exploratory_analysis import save_obj, load_obj, returnDataOnDate, returnVolumeAndPriceChange

import warnings
warnings.filterwarnings('ignore')

In [3]:
insiderDat = pd.read_csv('data/insiderDat_June_clean.csv')
insiderDat.head()

Unnamed: 0,FilingDate,TradeDate,Ticker,CompanyName,InsiderName,Title,TradeType,Price,Qty,Owned,DeltaOwn,Value
0,2021-06-01,2021-05-27,CMBM,Cambium Networks Corp,Rau Sally,GC,S - Sale,58.6,-4938.0,0.0,-100.0,-289353.0
1,2021-06-01,2021-05-27,TNET,"Trinet Group, Inc.",Chamberlain Paul Edward,Dir,S - Sale,77.73,-175.0,36185.0,0.0,-13603.0
2,2021-06-01,2021-05-27,PANL,Pangaea Logistics Solutions Ltd.,"Pangaea One Acquisition Holdings Xiv, LLC",10%,S - Sale,4.21,-184466.0,6788742.0,-3.0,-775965.0
3,2021-06-01,2021-05-27,TALO,Talos Energy Inc.,"Apollo Management Holdings Gp, LLC",10%,S - Sale,14.1,-81942.0,18808537.0,0.0,-1155620.0
4,2021-06-01,2019-07-18,CLXT,"Calyxt, Inc.",Dumont Philippe,Dir,S - Sale,10.0,-706.0,16354.0,-4.0,-7060.0


In [5]:
historicDat = load_obj('historicDat')
historicDat['OPOF']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-06-01,23.250000,23.250000,23.250000,23.250000,22.616158,0
2021-06-02,23.000000,23.000000,23.000000,23.000000,22.372974,900
2021-06-03,23.340000,23.340000,23.340000,23.340000,22.703703,300
2021-06-04,23.299999,23.299999,23.299999,23.299999,22.664793,1400
2021-06-07,23.090000,23.250000,23.059999,23.059999,22.431335,52300
...,...,...,...,...,...,...
2021-11-23,22.900000,22.900000,22.709999,22.709999,22.220882,600
2021-11-24,22.709999,22.709999,22.709999,22.709999,22.220882,0
2021-11-26,22.709999,22.709999,22.709999,22.709999,22.220882,0
2021-11-29,22.709999,22.709999,22.709999,22.709999,22.220882,0


In [10]:
def createAllFeatures(insiderDat, historicDat, startDate, endDate, DAYS_TO_LOOK_FORWARD=90, DAYS_TO_LOOK_BACK=1):
    insiderDat['FilingDate'] = pd.to_datetime(insiderDat['FilingDate']).dt.date
    insiderDat['TradeDate'] = pd.to_datetime(insiderDat['TradeDate']).dt.date

    insiderDat[['NumTrades','TradeToFileTime','ValueOwned','%VolumeChange','%FuturePriceChange']] = 0
    # ['NumTradesCAT', 'TradeToFileTimeCAT','%VolumeChangeCAT']
    startDate = dt.datetime.strptime(startDate, '%Y-%m-%d').date()
    endDate = dt.datetime.strptime(endDate, '%Y-%m-%d').date()
    delta = endDate - startDate

    for trade in insiderDat.iterrows():
        tradeNum = trade[0]
        print(f'Processing trade {tradeNum}', end='\r')
        tick = trade[1]['Ticker']
        tradeDate = trade[1]['TradeDate']
        fileDate = trade[1]['FilingDate']

        # skip the first DAYS_TO_LOOK_BACK days so we have data to look back at
        if (fileDate - dt.timedelta(days=DAYS_TO_LOOK_BACK)) < startDate:
            continue


        # compute percentage change in shares owned by insider
        owned = insiderDat.at[tradeNum, 'Owned']
        shareChange = insiderDat.at[tradeNum, 'Qty']
        price = insiderDat.at[tradeNum, 'Price']
        if owned != shareChange:
            insiderDat.at[tradeNum, 'DeltaOwn'] = 100*shareChange / (owned-shareChange)


        # compute total value of insider's trade
        insiderDat.at[tradeNum, 'Value'] = shareChange*price


        # compute total value of insider's shares
        insiderDat.at[tradeNum, 'ValueOwned'] = owned*price


        # compute and categorize time gaps between trades and filings
        tradeToFileTime = (fileDate - tradeDate).days
        insiderDat.at[tradeNum, 'TradeToFileTime'] = tradeToFileTime


        # compute and categorize the number of same-ticker trades in the last DAYS_TO_LOOK_BACK days
        recentTrades = insiderDat.apply(lambda x: True if (x['Ticker'] == tick) 
                                                    and (x['FilingDate'] <= fileDate)
                                                    and (x['FilingDate'] 
                                                         >= fileDate-dt.timedelta(days=DAYS_TO_LOOK_BACK))
                                                    else False, axis=1)

        insiderDat.at[tradeNum, 'NumTrades'] = len(recentTrades[recentTrades == True].index)


        # compute and categorize the percentage volume change in the last DAYS_TO_LOOK_BACK days
        # compute the most best closing price percentage change in the next DAYS_TO_LOOK_FORWARD days
        percentChangeVol, percentChangePrice = returnVolumeAndPriceChange(historicDat, tick, fileDate, 
                                                                         DAYS_TO_LOOK_FORWARD, DAYS_TO_LOOK_BACK)
        insiderDat.at[tradeNum, '%FuturePriceChange'] = percentChangePrice

        insiderDat.at[tradeNum, '%VolumeChange'] = percentChangeVol
        
        
    return insiderDat

In [None]:
insiderDat_complete = createAllFeatures(insiderDat, historicDat, '2021-06-01', '2021-06-30')

In [7]:
insiderDat_complete.iloc[347]

FilingDate                     2021-06-02
TradeDate                      2021-06-01
Ticker                               EVOP
CompanyName            Evo Payments, Inc.
InsiderName                 Wilson Darren
Title                 Pres, International
TradeType                        S - Sale
Price                               28.62
Qty                               -4000.0
Owned                             60300.0
DeltaOwn                         -6.22084
Value                           -114480.0
NumTrades                               2
TradeToFileTime                         1
ValueOwned                      1725786.0
%VolumeChange                    1.669521
%FuturePriceChange               3.892522
Name: 347, dtype: object

In [8]:
insiderDat_complete.loc[insiderDat_complete['FilingDate'] == dt.datetime.fromisoformat('2021-06-02').date()]

Unnamed: 0,FilingDate,TradeDate,Ticker,CompanyName,InsiderName,Title,TradeType,Price,Qty,Owned,DeltaOwn,Value,NumTrades,TradeToFileTime,ValueOwned,%VolumeChange,%FuturePriceChange
229,2021-06-02,2021-05-28,CZWI,Citizens Community Bancorp Inc.,Bianchi Stephen M,"CEO, Pres",P - Purchase,13.57,1100.0,94648.0,1.175867,14927.00,1,5,1.284373e+06,-75.585284,1.867818
230,2021-06-02,2021-06-01,PANW,Palo Alto Networks Inc,Compeau Jean,"Deputy CFO, CAO",S - Sale,360.07,-2498.0,22112.0,-10.150345,-899454.86,1,1,7.961868e+06,6.499322,28.968036
231,2021-06-02,2021-06-01,RAPT,"Rapt Therapeutics, Inc.",Ho William,Chief Medical Officer,S - Sale,19.50,-1000.0,62529.0,-1.574084,-19500.00,1,1,1.219316e+06,33.821871,117.500007
232,2021-06-02,2021-05-28,RPTX,Repare Therapeutics Inc.,Gadicke Ansbert,10%,S - Sale,32.47,-38098.0,4202574.0,-0.898395,-1237042.06,2,5,1.364576e+08,-4.576976,9.747745
233,2021-06-02,2021-05-28,RPTX,Repare Therapeutics Inc.,Foley Todd,Dir,S - Sale,32.47,-16786.0,2383672.0,-0.699283,-545041.42,2,5,7.739783e+07,-4.576976,9.747745
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569,2021-06-02,2021-06-01,XFOR,"X4 Pharmaceuticals, Inc",Ragan Paula,"Pres, CEO",S - Sale,9.41,-4104.0,408648.0,-0.994302,-38618.64,2,1,3.845378e+06,241.049383,-0.736839
570,2021-06-02,2021-06-01,RYN,Rayonier Inc,Bridwell Mark R,"VP, GC, Secretary",S - Sale+OE,38.50,-6166.0,54902.0,-10.096941,-237391.00,2,1,2.113727e+06,12.482307,0.579703
571,2021-06-02,2021-06-01,VMI,Valmont Industries Inc,Schapper Aaron M,EVP Infrastructure,S - Sale,251.00,-1004.0,7480.0,-11.834041,-252004.00,1,1,1.877480e+06,29.217791,0.476303
572,2021-06-02,2021-06-01,SCCO,Southern Copper Corp/,Ariztegui Andreve Vicente,Dir,S - Sale,72.23,-1000.0,2700.0,-27.027027,-72230.00,1,1,1.950210e+05,-6.499174,-0.988031


In [9]:
insiderDat_complete.to_csv('data/training_and_cv_data.csv')