# Options Feature Engineering

Goal: Time to expiration is a common feature used in options pricing. Must derive time to expiration for each option in our data by finding difference between the current date and expiration date

Note: In general longer time to expiration results in higher option price because there is more time for the option to finish "in-the-money"

In [None]:
import pandas as pd
from datetime import datetime, timedelta

In [2]:
%cd '/Users/benjochem/Desktop/Junior/Research'

/Users/benjochem/Desktop/Junior/Research


In [3]:
options = pd.read_csv('Project/data/interim/full_options_w_dates.csv')
print(len(options))
options.head()

3147375


Unnamed: 0,cp_flag,strike_price,volume,open_interest,delta,gamma,impl_volatility,best_bid,best_offer,Date,exDate
0,C,15000,0,0,,,,13.3,13.8,20100528,20100619
1,C,16000,0,0,,,,12.3,12.8,20100528,20100619
2,C,17000,0,0,,,,11.3,11.8,20100528,20100619
3,C,18000,0,0,,,,10.3,10.8,20100528,20100619
4,C,19000,0,0,,,,9.3,9.8,20100528,20100619


In [4]:
# find the time-to-expiration for each option by finding the difference between exDate and Date of our model
# create a function to find the time-to-expiration (TTE)
def find_expiration(date = [], exdate = []):
    TTE =[]
    
    for i in range(0, len(date)):
        S = str(date[i])
        E = str(exdate[i])
        
        S_year, S_month, S_day = S[0:4], S[4:6], S[6:8]
        E_year, E_month, E_day = E[0:4], E[4:6], E[6:8]
        
        start = datetime(year = int(S_year), month = int(S_month), day = int(S_day))
        end = datetime(year = int(E_year), month = int(E_month), day = int(E_day))
        
        diff = end - start
        TTE.append(diff)
    
    return TTE

In [5]:
options['TTE'] = find_expiration(options.Date, options.exDate)
options.head()

Unnamed: 0,cp_flag,strike_price,volume,open_interest,delta,gamma,impl_volatility,best_bid,best_offer,Date,exDate,TTE
0,C,15000,0,0,,,,13.3,13.8,20100528,20100619,22 days
1,C,16000,0,0,,,,12.3,12.8,20100528,20100619,22 days
2,C,17000,0,0,,,,11.3,11.8,20100528,20100619,22 days
3,C,18000,0,0,,,,10.3,10.8,20100528,20100619,22 days
4,C,19000,0,0,,,,9.3,9.8,20100528,20100619,22 days


In [6]:
# used to covert timestamp time to expiration to integer number of days
def numeric_maturity(TTE = []):
    converted =[]
    for i in TTE:
        i = str(i).split(' ')
        day = int(i[0])
        converted.append(day)
    return converted

In [7]:
options['TTE'] = numeric_maturity(options.TTE)
options.head()

Unnamed: 0,cp_flag,strike_price,volume,open_interest,delta,gamma,impl_volatility,best_bid,best_offer,Date,exDate,TTE
0,C,15000,0,0,,,,13.3,13.8,20100528,20100619,22
1,C,16000,0,0,,,,12.3,12.8,20100528,20100619,22
2,C,17000,0,0,,,,11.3,11.8,20100528,20100619,22
3,C,18000,0,0,,,,10.3,10.8,20100528,20100619,22
4,C,19000,0,0,,,,9.3,9.8,20100528,20100619,22


In [8]:
print(len(options))
options.isnull().sum()

3147375


cp_flag                 0
strike_price            0
volume                  0
open_interest           0
delta              630549
gamma              630549
impl_volatility    630549
best_bid                0
best_offer              0
Date                    0
exDate                  0
TTE                     0
dtype: int64

In [9]:
options.to_csv('Project/data/interim/full_options_w_dates_TTE.csv', index = False)