## Data cleasning and preprocessing of the option pricing data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import yfinance as yf
import sqlite3
import math

In [2]:
# Read in the data from Jan 2023 to June 2023
df_2023_h1 = pd.DataFrame()
for i in [202301, 202302, 202303, 202304,  202305]:
    df_2023_h1 = pd.concat([df_2023_h1, pd.read_table(f'data/spy_eod_{i}.txt', sep=',')], ignore_index=True)
df_2023_h1.columns = df_2023_h1.columns.str.strip()

  df_2023_h1 = pd.concat([df_2023_h1, pd.read_table(f'data/spy_eod_{i}.txt', sep=',')], ignore_index=True)


In [3]:
# also drop expiration date later than 2024
df_2023_h1 = df_2023_h1[df_2023_h1['[EXPIRE_DATE]'] <= ' 2023-12-31']
df_2023_h1 = df_2023_h1[df_2023_h1['[EXPIRE_DATE]'] >= ' 2023-06-01']
df_2023_h1 = df_2023_h1.reset_index()

In [5]:
# get adj close data from yfinance
target = pd.DataFrame(yf.download(['SPY'], start="2023-01-01", end="2023-12-31")['Adj Close'])
target

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2023-01-03,375.118713
2023-01-04,378.014740
2023-01-05,373.700256
2023-01-06,382.270020
2023-01-09,382.053284
...,...
2023-12-22,473.649994
2023-12-26,475.649994
2023-12-27,476.510010
2023-12-28,476.690002


In [6]:
df_2023_h1['[EXPIRE_DATE]'] = df_2023_h1['[EXPIRE_DATE]'].str.strip().astype('datetime64[ns]')
df_2023_h1['[EXPIRE_DATE]'].unique()

<DatetimeArray>
['2023-06-16 00:00:00', '2023-06-30 00:00:00', '2023-09-15 00:00:00',
 '2023-09-29 00:00:00', '2023-12-15 00:00:00', '2023-12-29 00:00:00',
 '2023-07-21 00:00:00', '2023-08-18 00:00:00', '2023-10-20 00:00:00',
 '2023-06-02 00:00:00', '2023-06-09 00:00:00', '2023-06-23 00:00:00',
 '2023-06-01 00:00:00', '2023-06-05 00:00:00', '2023-06-06 00:00:00',
 '2023-06-07 00:00:00', '2023-06-08 00:00:00', '2023-07-07 00:00:00',
 '2023-06-12 00:00:00', '2023-06-13 00:00:00', '2023-06-14 00:00:00',
 '2023-11-17 00:00:00']
Length: 22, dtype: datetime64[ns]

In [7]:
target['[EXPIRE_DATE]'] = target.index
target['[EXPIRE_DATE]'].astype('datetime64[ns]')

Date
2023-01-03   2023-01-03
2023-01-04   2023-01-04
2023-01-05   2023-01-05
2023-01-06   2023-01-06
2023-01-09   2023-01-09
                ...    
2023-12-22   2023-12-22
2023-12-26   2023-12-26
2023-12-27   2023-12-27
2023-12-28   2023-12-28
2023-12-29   2023-12-29
Name: [EXPIRE_DATE], Length: 250, dtype: datetime64[ns]

In [8]:
df_2023_h1 = pd.merge(df_2023_h1, target, on = '[EXPIRE_DATE]')

Revising the target here. Since with call option the loss cannot be less than the option price. Better split the target and tables to call and put. 

In [9]:
# Add new cols for the target, namely -rt and price diff
df_2023_h1['-rt'] = -0.04*(df_2023_h1['[EXPIRE_UNIX]'] - df_2023_h1['[QUOTE_UNIXTIME]'])/(3600*365*24)
df_2023_h1['price_diff'] = df_2023_h1['[STRIKE]'] - df_2023_h1['Adj Close']
df_2023_h1['-rt'] = pd.to_numeric(df_2023_h1['-rt'])
df_2023_h1['exp(-rt)'] = df_2023_h1['-rt'].apply(lambda x: math.exp(x))
df_2023_h1 = df_2023_h1.loc[:, ~df_2023_h1.columns.str.contains('^Unnamed')]   
df_2023_h1['discounted_price'] = df_2023_h1['price_diff'] * df_2023_h1['exp(-rt)']

In [10]:
# in case that the value is smaller than 0
df_2023_h1['adj_call_target'] = df_2023_h1[df_2023_h1['price_diff'] > 0]['price_diff'] * df_2023_h1[df_2023_h1['price_diff'] > 0]['exp(-rt)']
df_2023_h1['adj_put_target'] = -df_2023_h1[df_2023_h1['price_diff'] < 0]['price_diff'] * df_2023_h1[df_2023_h1['price_diff'] < 0]['exp(-rt)']

In [11]:
df_2023_h1.fillna(0, inplace=True) # fill na

In [12]:
"""strike_amount = []
for date in df_2023_h1['[QUOTE_DATE]'].unique():
    for expire_date in df_2023_h1[df_2023_h1['[QUOTE_DATE]'] == date]['[EXPIRE_DATE]'].unique():
        length = len(df_2023_h1[df_2023_h1['[QUOTE_DATE]'] ==date][df_2023_h1['[EXPIRE_DATE]'] == expire_date])
        strike_amount.append(length)
min(strike_amount)""" # 11

"strike_amount = []\nfor date in df_2023_h1['[QUOTE_DATE]'].unique():\n    for expire_date in df_2023_h1[df_2023_h1['[QUOTE_DATE]'] == date]['[EXPIRE_DATE]'].unique():\n        length = len(df_2023_h1[df_2023_h1['[QUOTE_DATE]'] ==date][df_2023_h1['[EXPIRE_DATE]'] == expire_date])\n        strike_amount.append(length)\nmin(strike_amount)"

In [13]:
df_2023_h1.groupby(['[QUOTE_DATE]', '[EXPIRE_DATE]']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,[QUOTE_UNIXTIME],[QUOTE_READTIME],[QUOTE_DATE],[QUOTE_TIME_HOURS],[UNDERLYING_LAST],[EXPIRE_DATE],[EXPIRE_UNIX],[DTE],[C_DELTA],...,[P_VOLUME],[STRIKE_DISTANCE],[STRIKE_DISTANCE_PCT],Adj Close,-rt,price_diff,exp(-rt),discounted_price,adj_call_target,adj_put_target
[QUOTE_DATE],[EXPIRE_DATE],Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2023-01-03,2023-06-16,0,2382,1672779600,2023-01-03 16:00,2023-01-03,16.0,380.82,2023-06-16,1686945600,163.96,0.98313,...,135.000000,230.8,0.606,436.146973,-0.017968,-286.146973,0.982192,-281.051389,0.000000,281.051389
2023-01-03,2023-06-16,1,2383,1672779600,2023-01-03 16:00,2023-01-03,16.0,380.82,2023-06-16,1686945600,163.96,0.98258,...,101.000000,220.8,0.580,436.146973,-0.017968,-276.146973,0.982192,-271.229465,0.000000,271.229465
2023-01-03,2023-06-16,2,2384,1672779600,2023-01-03 16:00,2023-01-03,16.0,380.82,2023-06-16,1686945600,163.96,0.97997,...,27.000000,210.8,0.554,436.146973,-0.017968,-266.146973,0.982192,-261.407541,0.000000,261.407541
2023-01-03,2023-06-16,3,2385,1672779600,2023-01-03 16:00,2023-01-03,16.0,380.82,2023-06-16,1686945600,163.96,0.97810,...,215.000000,200.8,0.527,436.146973,-0.017968,-256.146973,0.982192,-251.585617,0.000000,251.585617
2023-01-03,2023-06-16,4,2386,1672779600,2023-01-03 16:00,2023-01-03,16.0,380.82,2023-06-16,1686945600,163.96,0.97648,...,0.000000,195.8,0.514,436.146973,-0.017968,-251.146973,0.982192,-246.674654,0.000000,246.674654
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-31,2023-12-29,90611,403301,1685563200,2023-05-31 16:00,2023-05-31,16.0,417.80,2023-12-29,1703883600,212.04,0.02990,...,,92.2,0.221,475.309998,-0.023237,34.690002,0.977030,33.893189,33.893189,0.000000
2023-05-31,2023-12-29,90612,403302,1685563200,2023-05-31 16:00,2023-05-31,16.0,417.80,2023-12-29,1703883600,212.04,0.02566,...,,97.2,0.233,475.309998,-0.023237,39.690002,0.977030,38.778342,38.778342,0.000000
2023-05-31,2023-12-29,90613,403303,1685563200,2023-05-31 16:00,2023-05-31,16.0,417.80,2023-12-29,1703883600,212.04,0.02121,...,0.000000,102.2,0.245,475.309998,-0.023237,44.690002,0.977030,43.663494,43.663494,0.000000
2023-05-31,2023-12-29,90614,403304,1685563200,2023-05-31 16:00,2023-05-31,16.0,417.80,2023-12-29,1703883600,212.04,0.01735,...,0.000000,107.2,0.257,475.309998,-0.023237,49.690002,0.977030,48.548646,48.548646,0.000000


In [24]:
# scrapped
"""df_2023_h1['abs_strike_distance'] = df_2023_h1.groupby(['[QUOTE_DATE]', '[EXPIRE_DATE]'])['[STRIKE_DISTANCE]'].apply(abs).reset_index()['[STRIKE_DISTANCE]']
df_2023_h1['rank'] = df_2023_h1.groupby(['[QUOTE_DATE]', '[EXPIRE_DATE]'])['[STRIKE_DISTANCE]'].rank()
df_2023_h1 = df_2023_h1[df_2023_h1['rank'] <= 11].reset_index()"""

In [14]:
amount = []
for date in df_2023_h1['[QUOTE_DATE]'].unique():
    length = len(df_2023_h1[df_2023_h1['[QUOTE_DATE]'] ==date])
    amount.append(length)
min(amount) # minimum amount of entries of a quote date

765

In [15]:
df_2023_h1 = df_2023_h1.groupby(['[QUOTE_DATE]']).sample(n = 765, random_state = 42)

In [16]:
df_2023_h1['[QUOTE_DATE]'] = df_2023_h1['[QUOTE_DATE]'].apply(np.datetime64)
df_2023_h1['[EXPIRE_DATE]'] = df_2023_h1['[EXPIRE_DATE]'].apply(np.datetime64)

In [17]:
df_2023_h1 = df_2023_h1[['[EXPIRE_UNIX]', '[QUOTE_DATE]', '[EXPIRE_DATE]', '[STRIKE]', '[UNDERLYING_LAST]', '[C_DELTA]', '[C_GAMMA]', '[C_VEGA]',
       '[C_THETA]', '[C_RHO]', '[C_IV]', '[C_VOLUME]','[C_BID]', '[C_ASK]', '[P_DELTA]', '[P_GAMMA]', '[P_VEGA]', '[P_THETA]',
       '[P_RHO]', '[P_IV]', '[P_VOLUME]', '[P_BID]', '[P_ASK]', 'adj_call_target', 'adj_put_target','discounted_price']]
for column in df_2023_h1.columns:
    print(type(df_2023_h1[column][0]))

<class 'numpy.int64'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'str'>
<class 'str'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'str'>
<class 'str'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>


In [18]:
df_2023_h1.groupby(['[QUOTE_DATE]']).apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,[EXPIRE_UNIX],[QUOTE_DATE],[EXPIRE_DATE],[STRIKE],[UNDERLYING_LAST],[C_DELTA],[C_GAMMA],[C_VEGA],[C_THETA],[C_RHO],...,[P_VEGA],[P_THETA],[P_RHO],[P_IV],[P_VOLUME],[P_BID],[P_ASK],adj_call_target,adj_put_target,discounted_price
[QUOTE_DATE],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2023-01-03,35834,1694808000,2023-01-03,2023-09-15,265.0,380.82,0.92227,0.00116,0.42399,-0.04696,1.58341,...,0.39418,-0.02328,-0.18374,0.333300,2.000000,3.19,3.28,0.000000,171.714191,-171.714191
2023-01-03,22736,1688155200,2023-01-03,2023-06-30,364.0,380.82,0.66618,0.00543,0.95754,-0.08510,1.05430,...,0.96215,-0.05433,-0.61092,0.246080,0.000000,15.58,15.66,0.000000,74.471581,-74.471581
2023-01-03,79332,1703883600,2023-01-03,2023-12-29,425.0,380.82,0.39002,0.00491,1.44059,-0.05273,1.28108,...,1.26217,-0.02139,-1.46953,0.205950,,49.51,54.50,0.000000,48.363808,-48.363808
2023-01-03,193,1686945600,2023-01-03,2023-06-16,610.0,380.82,0.00156,0.00004,0.01151,-0.00107,0.00213,...,0.00000,0.00000,0.00000,,,228.67,229.45,170.757127,0.000000,170.757127
2023-01-03,22810,1688155200,2023-01-03,2023-06-30,545.0,380.82,0.00621,0.00034,0.04473,-0.00286,0.01035,...,0.00000,0.00000,0.00000,,,163.58,164.55,103.032703,0.000000,103.032703
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-31,35797,1688155200,2023-05-31,2023-06-30,480.0,417.80,0.00106,0.00017,0.00422,-0.00102,0.00074,...,0.16031,-0.06989,-0.11405,0.290430,0.000000,61.89,63.04,39.930304,0.000000,39.930304
2023-05-31,118969,1685736000,2023-05-31,2023-06-02,451.0,417.80,0.00190,0.00052,0.00219,-0.00479,0.00014,...,0.00000,0.00000,0.00000,,,32.61,33.36,27.871607,0.000000,27.871607
2023-05-31,22552,1686945600,2023-05-31,2023-06-16,366.0,417.80,0.91525,0.00291,0.13140,-0.16689,1.01291,...,0.05407,-0.05017,-0.00558,0.324910,0.000000,0.29,0.30,0.000000,70.024083,-70.024083
2023-05-31,126350,1686168000,2023-05-31,2023-06-07,450.0,417.80,0.00123,0.00055,0.00299,-0.00246,-0.00009,...,0.00000,0.00000,0.00000,,,31.59,32.80,28.210742,0.000000,28.210742


In [19]:
df_2023_h1 = df_2023_h1.replace(r'^\s*$', 0, regex=True)

In [20]:
for column in ['[C_IV]', '[C_VOLUME]', '[P_IV]', '[P_VOLUME]']:
    df_2023_h1[column] = df_2023_h1[column].str.strip().astype('float64')
    print(type(df_2023_h1[column][0]))

<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>


In [21]:
df_2023_h1 = df_2023_h1.fillna(0)

In [22]:
# Basic normalization and standardization
# run block of code and catch warnings
import warnings
from sklearn.preprocessing import MinMaxScaler
with warnings.catch_warnings():
	# ignore all caught warnings
	warnings.filterwarnings("ignore")
	# execute code that will generate warnings
	# dont standardize unixtime '[QUOTE_UNIXTIME]', '[EXPIRE_UNIX]', if it does what I think it does. 
	numeric_cols = ['[EXPIRE_UNIX]', '[STRIKE]', '[UNDERLYING_LAST]', '[C_DELTA]', '[C_GAMMA]', '[C_VEGA]',
       '[C_THETA]', '[C_RHO]', '[C_IV]', '[C_VOLUME]','[C_BID]', '[C_ASK]', '[P_DELTA]', '[P_GAMMA]', '[P_VEGA]', '[P_THETA]',
       '[P_RHO]', '[P_IV]', '[P_VOLUME]', '[P_BID]', '[P_ASK]']  # not sure about all this, we ball
	scaler = MinMaxScaler((0, 1))
	df_2023_h1[numeric_cols] = scaler.fit_transform(df_2023_h1[numeric_cols])

In [23]:
df_2023_h1

Unnamed: 0,[EXPIRE_UNIX],[QUOTE_DATE],[EXPIRE_DATE],[STRIKE],[UNDERLYING_LAST],[C_DELTA],[C_GAMMA],[C_VEGA],[C_THETA],[C_RHO],...,[P_VEGA],[P_THETA],[P_RHO],[P_IV],[P_VOLUME],[P_BID],[P_ASK],adj_call_target,adj_put_target,discounted_price
35834,0.502270,2023-01-03,2023-09-15,0.201754,0.035337,0.92227,0.010511,0.009460,0.920424,0.958921,...,0.257821,0.946321,0.880798,0.193657,0.000034,0.009380,0.009560,0.000000,171.714191,-171.714191
22736,0.137414,2023-01-03,2023-06-30,0.375439,0.035337,0.66618,0.048225,0.021365,0.855794,0.955811,...,0.629313,0.874726,0.603664,0.143055,0.000000,0.045814,0.045755,0.000000,74.471581,-74.471581
79332,1.000000,2023-01-03,2023-12-29,0.482456,0.035337,0.39002,0.043632,0.032143,0.910647,0.957144,...,0.825547,0.950679,0.046639,0.119774,0.000000,0.145588,0.159309,0.000000,48.363808,-48.363808
193,0.071076,2023-01-03,2023-06-16,0.807018,0.035337,0.00156,0.000618,0.000257,0.998187,0.949626,...,0.000000,1.000000,1.000000,0.000290,0.000000,0.672420,0.670799,170.757127,0.000000,170.757127
22810,0.137414,2023-01-03,2023-06-30,0.692982,0.035337,0.00621,0.003268,0.000998,0.995154,0.949674,...,0.000000,1.000000,1.000000,0.000290,0.000000,0.481019,0.481055,103.032703,0.000000,103.032703
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35797,0.137414,2023-05-31,2023-06-30,0.578947,0.942822,0.00106,0.001766,0.000094,0.998272,0.949618,...,0.104854,0.838848,0.926010,0.168785,0.000000,0.181992,0.184277,39.930304,0.000000,39.930304
118969,0.004738,2023-05-31,2023-06-02,0.528070,0.942822,0.00190,0.004858,0.000049,0.991883,0.949614,...,0.000000,1.000000,1.000000,0.000290,0.000000,0.095892,0.097503,27.871607,0.000000,27.871607
22552,0.071076,2023-05-31,2023-06-16,0.378947,0.942822,0.91525,0.025967,0.002932,0.717198,0.955567,...,0.035366,0.884318,0.996380,0.188789,0.000000,0.000853,0.000848,0.000000,70.024083,-70.024083
126350,0.028430,2023-05-31,2023-06-07,0.526316,0.942822,0.00123,0.005123,0.000067,0.995831,0.949613,...,0.000000,1.000000,1.000000,0.000290,0.000000,0.092893,0.095866,28.210742,0.000000,28.210742


In [24]:
df_2023_h1_call = df_2023_h1[['[QUOTE_DATE]', '[EXPIRE_DATE]', '[EXPIRE_UNIX]', '[STRIKE]', '[UNDERLYING_LAST]', '[C_DELTA]', '[C_GAMMA]', '[C_VEGA]',
       '[C_THETA]', '[C_RHO]', '[C_IV]', '[C_VOLUME]','[C_BID]', '[C_ASK]', 'adj_call_target',]]

In [25]:
df_2023_h1_put = df_2023_h1[['[QUOTE_DATE]', '[EXPIRE_DATE]', '[EXPIRE_UNIX]', '[STRIKE]', '[UNDERLYING_LAST]', '[P_DELTA]', '[P_GAMMA]', '[P_VEGA]', '[P_THETA]',
       '[P_RHO]', '[P_IV]', '[P_VOLUME]', '[P_BID]', '[P_ASK]', 'adj_put_target']]

In [39]:
conn = sqlite3.connect("data/tables_split.db")
df_2023_h1_call.to_sql("df_2023_h1_call", conn, if_exists = "replace", index=False)
df_2023_h1_put.to_sql("df_2023_h1_put", conn, if_exists = "replace", index=False)
conn.close()

In [38]:
 # try to put '[QUOTE_DATE]' as timestamps and 'adj_call_target' as target see what happens. 
time_series_call = []
for date in df_2023_h1_call['[QUOTE_DATE]'].unique():
    time_series_call.append(df_2023_h1_call[df_2023_h1_call['[QUOTE_DATE]'] == date].reset_index().drop(['[EXPIRE_DATE]', 'index'], axis = 1))
time_series_call

[    [QUOTE_DATE]  [EXPIRE_UNIX]  [STRIKE]  [UNDERLYING_LAST]  [C_DELTA]  \
 0     2023-01-03       0.502270  0.201754           0.035337    0.92227   
 1     2023-01-03       0.137414  0.375439           0.035337    0.66618   
 2     2023-01-03       1.000000  0.482456           0.035337    0.39002   
 3     2023-01-03       0.071076  0.807018           0.035337    0.00156   
 4     2023-01-03       0.137414  0.692982           0.035337    0.00621   
 ..           ...            ...       ...                ...        ...   
 760   2023-01-03       0.071076  0.340351           0.035337    0.76705   
 761   2023-01-03       0.071076  0.401754           0.035337    0.58103   
 762   2023-01-03       0.137414  0.394737           0.035337    0.60443   
 763   2023-01-03       0.568608  0.184211           0.035337    0.92963   
 764   2023-01-03       0.071076  0.394737           0.035337    0.60618   
 
      [C_GAMMA]  [C_VEGA]  [C_THETA]   [C_RHO]    [C_IV]  [C_VOLUME]   [C_BID]  \
 0  

In [27]:
 # split target into two df
"""target = df_2023_h1['discounted_price']
df_2023_h1 = df_2023_h1.drop('discounted_price', axis=1)"""

In [28]:
"""target_call = df_2023_h1_call['adj_call_target']
df_2023_h1_call = df_2023_h1_call.drop('adj_call_target', axis=1)"""

In [30]:
# output the df_2023_h1 to a csv file
# df_2023_h1.to_csv(r'data/df_2023_h1.csv', index = False, header=True)
# target.to_csv(r'data/target.csv', index = False, header=True)

In [31]:
# output to sqlite database if anyone cares
"""conn = sqlite3.connect("data/tables.db")
df_2023_h1.to_sql("df_2023_h1_feature", conn, if_exists = "replace", index=False)
target.to_sql("df_2023_h1_target", conn, if_exists = "replace", index=False)
conn.close()"""

'conn = sqlite3.connect("data/tables.db")\ndf_2023_h1.to_sql("df_2023_h1_feature", conn, if_exists = "replace", index=False)\ntarget.to_sql("df_2023_h1_target", conn, if_exists = "replace", index=False)\nconn.close()'

In [32]:
"""conn = sqlite3.connect("data/tables_split.db")
df_2023_h1_call.to_sql("df_2023_h1_feature", conn, if_exists = "replace", index=False)
target_call.to_sql("df_2023_h1_target", conn, if_exists = "replace", index=False)
conn.close()"""

'conn = sqlite3.connect("data/tables_split.db")\ndf_2023_h1_call.to_sql("df_2023_h1_feature", conn, if_exists = "replace", index=False)\ntarget_call.to_sql("df_2023_h1_target", conn, if_exists = "replace", index=False)\nconn.close()'