In [10]:
import sqlite3
import pandas as pd
import numpy as np
import datetime
pd.options.display.max_columns = 1000

In [11]:
class SqliteCore(object):
    def __init__(self, db_path):
        self.connection = sqlite3.connect(db_path)
        self.primary_idx_dict = {}
        
    def creat_table(self, table_name, para_list):
        sql_cmd = 'CREATE TABLE IF NOT EXISTS {}('.format(table_name)
        for idx in range(len(para_list) - 1):
            para_list[idx] += ','
        for para in para_list:
            sql_cmd += para
        sql_cmd += ')'
        self.connection.execute(sql_cmd)
    
    def _add_data(self, table_name, data_df, exists_type='append'):
        data_df.to_sql(table_name, con=self.connection, if_exists=exists_type, index=False)
        
    def _get_primary_idx(self, table_name, primary_key=None):
        if primary_key is None:
            primary_key = table_name + '_id'
        res = pd.read_sql('select * from {} order by {} desc'.format(table_name, primary_key), sql_core.connection)
        cur_primary_id = 0 if res.empty else res.loc[0][primary_key]
        self.primary_idx_dict[table_name] = cur_primary_id
        return cur_primary_id

In [12]:
sql_core = SqliteCore('test.db')
sql_core.creat_table('instrument', [
    'instrument_id int primary key not null',
    'description text not null',
    'ticker text not null',
    'maturity datetime',
    'strike real',
    'option_type text'
])

sql_core.creat_table('market', [
    'market_id int primary key not null',
    'instrument_id',
    'timestamp datetime not null',
    'bid real',
    'ask real',
    'mid real',
    'delta real',
    'IV real',
    'gamma real',
    'thela real',
    'vega real',
    'foreign key (instrument_id) references instrument (instrument_id)'
])

sql_core.creat_table('trade', [
    'trade_id int primary key not null',
    'instrument_id',
    'timestamp datetime not null',
    'qty real not null',
    'price real not null',
    'reason text',
    'foreign key (instrument_id) references instrument (instrument_id)'
])

sql_core.creat_table('position', [
    'position_id int primary key not null',
    'instrument_id',
    'replace_position_id',
    'startdate datetime not null',
    'enddate datetime not null',
    'foreign key (instrument_id) references instrument (instrument_id)',
    'foreign key (replace_position_id) references position (replace_position_id)'
])

# order是关键字
sql_core.creat_table('my_order', [
    'order_id int primary key not null',
    'instrument_id',
    'timestamp datetime not null',
    'qty real not null',
    'order_type real not null',
    'order_price real not null',
    'foreign key (instrument_id) references instrument (instrument_id)'
])

In [13]:
a = pd.read_csv('c3b5b352d23a34cfd555965199c8791c.csv')
a = a.head(20)
print(a)

            symbol           T     K putCall  \
0   AAPL_112522C70  2022-11-25  70.0    CALL   
1   AAPL_120222C70  2022-12-02  70.0    CALL   
2   AAPL_120922C70  2022-12-09  70.0    CALL   
3   AAPL_121622C70  2022-12-16  70.0    CALL   
4   AAPL_122322C70  2022-12-23  70.0    CALL   
5   AAPL_123022C70  2022-12-30  70.0    CALL   
6   AAPL_010623C70  2023-01-06  70.0    CALL   
7   AAPL_012023C70  2023-01-20  70.0    CALL   
8   AAPL_021723C70  2023-02-17  70.0    CALL   
9   AAPL_031723C70  2023-03-17  70.0    CALL   
10  AAPL_042123C70  2023-04-21  70.0    CALL   
11  AAPL_051923C70  2023-05-19  70.0    CALL   
12  AAPL_061623C70  2023-06-16  70.0    CALL   
13  AAPL_072123C70  2023-07-21  70.0    CALL   
14  AAPL_091523C70  2023-09-15  70.0    CALL   
15  AAPL_011924C70  2024-01-19  70.0    CALL   
16  AAPL_031524C70  2024-03-15  70.0    CALL   
17  AAPL_062124C70  2024-06-21  70.0    CALL   
18  AAPL_011725C70  2025-01-17  70.0    CALL   
19  AAPL_112522C75  2022-11-25  75.0    

In [55]:
def market_data_entry_api(sql_core, csv_path):
    new_market_df = pd.DataFrame(columns=[
        'market_id', 'instrument_id', 'timestamp', 'bid', 'ask', 'mid', 'delta', 'IV', 'gamma', 'thela', 'vega'
    ])
    new_instrument_df = pd.DataFrame(columns=[
        'instrument_id', 'description', 'ticker', 'maturity', 'strike', 'option_type'
    ])
    exist_market_df = pd.read_sql('select * from market', sql_core.connection)
#     new_market_df = exist_market_df.drop(labels=[0,len(exist_market_df) - 1],axis=0)
    
    exist_instrument_df = pd.read_sql('select * from instrument', sql_core.connection)
#     new_instrument_df = exist_instrument_df.drop(labels=[0,len(exist_instrument_df) - 1],axis=0)

    cur_market_id = sql_core._get_primary_idx('market')
    cur_instrument_id = sql_core._get_primary_idx('instrument')
    query_instrument_df = exist_instrument_df[['instrument_id', 'description']]
    
    df = pd.read_csv(csv_path)
    for idx, row in df.iterrows():
        if row['symbol'] not in query_instrument_df['description'].values:
            new_instrument_df.loc[len(new_instrument_df)] = {
                'instrument_id': cur_instrument_id + 1,
                'description': row['symbol'],
                'ticker': row['symbol'].split('_')[0],
                'maturity': row['T'],
                'strike': row['strikePrice'],
                'option_type': row['putCall']
            }
            cur_instrument_id += 1
            query_instrument_df = pd.concat(
                [exist_instrument_df[['instrument_id', 'description']],new_instrument_df[['instrument_id', 'description']]],axis=0
            )
        new_market_df.loc[len(new_market_df)] = {
            'market_id': cur_market_id + 1,
            'instrument_id': int(query_instrument_df[query_instrument_df['description'] == row['symbol']]['instrument_id']),
            'timestamp': row['quoteTime'],
            'bid': row['bid'],
            'ask': row['ask'],
            'mid': (row['bid'] + row['ask']) / 2,
            'delta': row['delta'],
            'IV': row['volatility'],
            'gamma': row['gamma'],
            'thela': row['theta'],
            'vega': row['vega']
        }
        cur_market_id += 1
    sql_core._add_data('instrument', new_instrument_df)
    sql_core._add_data('market', new_market_df)
market_data_entry_api(sql_core, 'c3b5b352d23a34cfd555965199c8791c.csv')

   instrument_id description ticker maturity strike option_type
0              1  AAPL_stock   AAPL     None   None        None


In [None]:
instrument_data = pd.DataFrame({
    'instrument_id': 2,
    'description': 'AAPL_stock',
    'ticker': 'AAPL',
}, index=[0])
sql_core._add_data('instrument', instrument_data)

In [50]:
pd.read_sql('select * from instrument', sql_core.connection)

Unnamed: 0,instrument_id,description,ticker,maturity,strike,option_type
0,1,AAPL_stock,AAPL,,,
1,2,AAPL_stock,AAPL,,,
2,3,AAPL_112522C70,AAPL,2022-11-25,70.0,CALL
3,4,AAPL_120222C70,AAPL,2022-12-02,70.0,CALL
4,5,AAPL_120922C70,AAPL,2022-12-09,70.0,CALL
...,...,...,...,...,...,...
2063,2064,AAPL_011924P310,AAPL,2024-01-19,310.0,PUT
2064,2065,AAPL_062124P310,AAPL,2024-06-21,310.0,PUT
2065,2066,AAPL_011725P310,AAPL,2025-01-17,310.0,PUT
2066,2067,AAPL_011924P320,AAPL,2024-01-19,320.0,PUT


In [51]:
pd.read_sql('select * from market', sql_core.connection)

Unnamed: 0,market_id,instrument_id,timestamp,bid,ask,mid,delta,IV,gamma,thela,vega
0,1,1,2022-01-22 16:00:00,11.00,11.00,11.000,11.000,11.000,11.0,11.000,11.0
1,2,1,2022-01-22 16:00:00,77.80,78.00,77.900,0.999,548.517,0.0,-0.046,0.0
2,3,1,2022-01-22 16:00:00,77.80,78.10,77.950,1.000,163.080,0.0,-0.020,0.0
3,4,3,2022-11-25 11:26:21,77.80,78.00,77.900,0.999,548.517,0.0,-0.046,0.0
4,5,4,2022-11-25 11:26:05,77.80,78.10,77.950,1.000,163.080,0.0,-0.020,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2066,2067,2064,2022-11-25 11:26:26,161.25,162.95,162.100,-1.000,29.145,0.0,0.000,0.0
2067,2068,2065,2022-11-25 11:25:49,160.45,163.20,161.825,-1.000,28.598,0.0,0.000,0.0
2068,2069,2066,2022-11-25 11:26:21,159.95,164.30,162.125,-1.000,28.060,0.0,0.000,0.0
2069,2070,2067,2022-11-25 11:26:18,171.25,172.95,172.100,-1.000,29.400,0.0,0.000,0.0


In [7]:
abc = pd.DataFrame({
    'market_id': 1,
    'instrument_id': 1,
    'timestamp': datetime.datetime(2022, 1, 22, 16 ,0),
    'bid': 11,
    'ask': 11,
    'mid': 11,
    'delta': 11,
    'IV': 11,
    'gamma': 11,
    'thela': 11,
    'vega': 11
}, index=[0])
sql_core._add_data('market', abc)

In [17]:
pd.read_sql('select * from market', sql_core.connection)

Unnamed: 0,market_id,instrument_id,timestamp,bid,ask,mid,delta,IV,gamma,thela,vega
0,1,1,2022-01-22 16:00:00,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0
1,2,1,2022-01-22 16:00:00,77.8,78.0,77.9,0.999,548.517,0.0,-0.046,0.0
2,3,1,2022-01-22 16:00:00,77.8,78.1,77.95,1.0,163.08,0.0,-0.02,0.0
