In [1]:
import spgci as ci
import sqlite3
import pandas as pd
from datetime import date

In [2]:
mdd = ci.MarketData()

## Backfill data
Assuming 2023-1-1 is when we are going to start databasing

In [3]:
initial_date = date(2023,1,1)
df = mdd.get_assessments_by_symbol_historical(symbol="PCAAS00", assess_date_lte=initial_date, paginate=True)
df

Fetching...: 100%|██████████| 3/3 [00:00<00:00,  2.74it/s]


Unnamed: 0,bate,value,assessDate,isCorrected,modDate,symbol
0,h,19.550,1987-07-08,N,1998-05-19 14:23:09,PCAAS00
1,l,19.450,1987-07-08,N,1998-05-19 14:23:54,PCAAS00
2,h,19.700,1987-07-09,N,1998-05-19 14:23:09,PCAAS00
3,l,19.650,1987-07-09,N,1998-05-19 14:23:54,PCAAS00
4,h,19.800,1987-07-10,N,1998-05-19 14:23:09,PCAAS00
...,...,...,...,...,...,...
20927,h,80.690,2022-12-29,N,2022-12-29 19:13:14,PCAAS00
20928,l,80.680,2022-12-29,N,2022-12-29 19:13:14,PCAAS00
20929,c,81.325,2022-12-30,N,2022-12-30 14:13:00,PCAAS00
20930,h,81.330,2022-12-30,N,2022-12-30 14:13:00,PCAAS00


connect to database, and setup schema for `spgci_assessments`

In [4]:
conn = sqlite3.connect('db.db', detect_types=sqlite3.PARSE_COLNAMES|sqlite3.PARSE_DECLTYPES)
conn.execute(pd.io.sql.get_schema(df, 'spgci_assessments', con=conn, keys=['symbol', 'bate', 'assessDate']))

<sqlite3.Cursor at 0x13410f0c0>

function to generate a prepared statement

In [5]:
def prepare_stmt(table, columns):
  cols = f"({','.join(columns)})"
  values = f"({','.join(['?' for i in columns])})"
  stmt = f"INSERT OR REPLACE INTO {table} {cols} VALUES {values}"

  return stmt

In [6]:
ps = prepare_stmt("spgci_assessments", df.columns)
ps

'INSERT OR REPLACE INTO spgci_assessments (bate,value,assessDate,isCorrected,modDate,symbol) VALUES (?,?,?,?,?,?)'

function to insert data from a dataframe

In [7]:
def insert(conn, stmt, row):
  values = []
  for r in row:
    if isinstance(r, pd.Timestamp):
      values.append(r.to_pydatetime())
    elif isinstance(r, list):
      values.append(str(r))
    else:
      values.append(r)
      
  conn.execute(stmt,values) 


In [8]:
isinstance(pd.Timestamp('2023-01-01'), list)

False

In [9]:
df.apply(lambda x: insert(conn, ps, x), axis=1)
conn.commit()

observe the records are in the database. hence backfilling is complete

In [10]:
r = conn.execute("""
    select * from spgci_assessments 
    order by assessDate desc
    limit 10;
""")
for row in r:
    print(row)

('c', 81.325, datetime.datetime(2022, 12, 30, 0, 0), 'N', datetime.datetime(2022, 12, 30, 14, 13), 'PCAAS00')
('h', 81.33, datetime.datetime(2022, 12, 30, 0, 0), 'N', datetime.datetime(2022, 12, 30, 14, 13), 'PCAAS00')
('l', 81.32, datetime.datetime(2022, 12, 30, 0, 0), 'N', datetime.datetime(2022, 12, 30, 14, 13), 'PCAAS00')
('c', 80.685, datetime.datetime(2022, 12, 29, 0, 0), 'N', datetime.datetime(2022, 12, 29, 19, 13, 14), 'PCAAS00')
('h', 80.69, datetime.datetime(2022, 12, 29, 0, 0), 'N', datetime.datetime(2022, 12, 29, 19, 13, 14), 'PCAAS00')
('l', 80.68, datetime.datetime(2022, 12, 29, 0, 0), 'N', datetime.datetime(2022, 12, 29, 19, 13, 14), 'PCAAS00')
('c', 81.14, datetime.datetime(2022, 12, 28, 0, 0), 'N', datetime.datetime(2022, 12, 28, 18, 43, 53), 'PCAAS00')
('h', 81.15, datetime.datetime(2022, 12, 28, 0, 0), 'N', datetime.datetime(2022, 12, 28, 18, 43, 53), 'PCAAS00')
('l', 81.13, datetime.datetime(2022, 12, 28, 0, 0), 'N', datetime.datetime(2022, 12, 28, 18, 43, 53), 'PCA

# Fetch Deltas
now - let's start getting newer data.

in reality this would be called every day (or several times a day) to add more data as it becomes available

In [11]:
df_since = mdd.get_assessments_by_symbol_historical(symbol="PCAAS00", assess_date_gte=initial_date, paginate=True)
df_since.head()

Unnamed: 0,bate,value,assessDate,isCorrected,modDate,symbol
0,c,81.23,2023-01-03,N,2023-01-03 18:32:18,PCAAS00
1,h,81.24,2023-01-03,N,2023-01-03 18:32:18,PCAAS00
2,l,81.22,2023-01-03,N,2023-01-03 18:32:18,PCAAS00
3,c,75.52,2023-01-04,N,2023-01-04 18:20:35,PCAAS00
4,h,75.53,2023-01-04,N,2023-01-04 18:20:35,PCAAS00


In [12]:
stmt = prepare_stmt("spgci_assessments", df_since.columns)
df_since.apply(lambda x: insert(conn, stmt, x), axis=1)
conn.commit()

Observe new data has been added

In [13]:
r = conn.execute("""
    select * from spgci_assessments 
    order by assessDate desc
    limit 10;
""")
for row in r:
    print(row)

('c', 77.84, datetime.datetime(2023, 3, 28, 0, 0), 'N', datetime.datetime(2023, 3, 28, 17, 1, 48), 'PCAAS00')
('h', 77.85, datetime.datetime(2023, 3, 28, 0, 0), 'N', datetime.datetime(2023, 3, 28, 17, 1, 48), 'PCAAS00')
('l', 77.83, datetime.datetime(2023, 3, 28, 0, 0), 'N', datetime.datetime(2023, 3, 28, 17, 1, 48), 'PCAAS00')
('c', 75.255, datetime.datetime(2023, 3, 27, 0, 0), 'N', datetime.datetime(2023, 3, 27, 18, 24, 7), 'PCAAS00')
('h', 75.26, datetime.datetime(2023, 3, 27, 0, 0), 'N', datetime.datetime(2023, 3, 27, 18, 24, 7), 'PCAAS00')
('l', 75.25, datetime.datetime(2023, 3, 27, 0, 0), 'N', datetime.datetime(2023, 3, 27, 18, 24, 7), 'PCAAS00')
('c', 73.245, datetime.datetime(2023, 3, 24, 0, 0), 'N', datetime.datetime(2023, 3, 24, 18, 37, 1), 'PCAAS00')
('h', 73.25, datetime.datetime(2023, 3, 24, 0, 0), 'N', datetime.datetime(2023, 3, 24, 18, 37, 1), 'PCAAS00')
('l', 73.24, datetime.datetime(2023, 3, 24, 0, 0), 'N', datetime.datetime(2023, 3, 24, 18, 37, 1), 'PCAAS00')
('c', 75

Now, lets add reference data so we know the description, currency, uom, etc..

In [14]:
ref = mdd.get_symbols(mdc="RI", paginate=True)
ref

Unnamed: 0,symbol,description,commodity,assessment_frequency,bate_code,benchmark,holiday_schedule,commodity_grade,contract_type,currency,...,kinematic_viscosity,pour_point,flash_point,originating_region,originating_region_basis,curve_code,curve_name,cargo_size,delivery_load,exchange
0,AJSVB00,Johan Sverdrup FOB North Sea vs North Sea Dtd ...,Crude oil,Daily (weekday),[c],Benchmark,CALLN00,Platts - Crude Oil - Johan Sverdrup,Spot,USD,...,,,,,,,,,,
1,AAGZU00,CPC Blend CIF Augusta,Crude oil,Daily (weekday),"[c, h, l]",Benchmark,CALLN00,Platts - Crude Oil - CPC Blend,Spot,USD,...,,,,,,,,,,
2,AALIN00,Urals Recombined (RCMB),Crude oil,Daily (weekday),"[c, h, l]",Benchmark,CALLN00,Platts - Crude Oil - Ural,Spot,USD,...,,,,,,,,,,
3,PCAAQ00,Brent Mo01 (NextGen MOC),Crude oil,Daily (weekday),"[c, h, l]",Benchmark,CALLN00,Platts - Crude Oil - Brent,Forward,USD,...,,,,,,,,,,
4,AWTID00,WTI Midland DAP Basis Rotterdam vs Fwd Dated B...,Crude oil,Daily (weekday),[c],Benchmark,CALLN00,Platts - Crude Oil - WTI Midland,Spot,USD,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599,AAUFI03,BTC Dated Brent Strip MAvg,Crude oil,Monthly,"[c, h, l]",,#Not Applicable,Platts - Crude Oil - Azeri Light,Strip,USD,...,,,,,,,,,,
600,AALDG00,Med Dated Brent Strip 13-28 Day MAvg,Crude oil,Monthly,"[c, h, l]",,#Not Applicable,Platts - Crude Oil - Brent,Strip,USD,...,,,,,,,,,,
601,AAWXY03,Brent NX EFP Mo03 MAvg,Crude oil,Monthly,"[c, h, l]",,#Not Applicable,Platts - Crude Oil - Brent,EFP,USD,...,,,,,,,,,,
602,AAGXO00,Qua Iboe FOB Nigeria vs WAF Dtd Strip MAvg,Crude oil,Monthly,"[c, h, l]",,#Not Applicable,Platts - Crude Oil - Qua Iboe,Spot,USD,...,3.92,12.0,,,,,,,,


same as before:

create table, prepare stmt, insert rows, commit

In [15]:
conn.execute(pd.io.sql.get_schema(ref, 'spgci_symbols', con=conn, keys=['symbol']))

<sqlite3.Cursor at 0x13410f9c0>

In [16]:
stmt = prepare_stmt("spgci_symbols", ref.columns)
ref.apply(lambda x: insert(conn, stmt, x), axis=1)
conn.commit()

In [17]:
pd.io.sql.get_schema(ref, 'spgci_symbols', con=conn, keys=['symbol'])

'CREATE TABLE "spgci_symbols" (\n"symbol" TEXT,\n  "description" TEXT,\n  "commodity" TEXT,\n  "assessment_frequency" TEXT,\n  "bate_code" TEXT,\n  "benchmark" TEXT,\n  "holiday_schedule" TEXT,\n  "commodity_grade" TEXT,\n  "contract_type" TEXT,\n  "currency" TEXT,\n  "decimal_places" REAL,\n  "publication_frequency_code" TEXT,\n  "mdc_description" TEXT,\n  "mdc" TEXT,\n  "day_of_publication" TEXT,\n  "quotation_style" TEXT,\n  "delivery_region" TEXT,\n  "delivery_region_basis" TEXT,\n  "settlement_type" TEXT,\n  "shipping_terms" TEXT,\n  "api_gravity" REAL,\n  "standard_lot_size" REAL,\n  "standard_lot_units" TEXT,\n  "active" TEXT,\n  "sulfur" REAL,\n  "timestamp" TEXT,\n  "uom" TEXT,\n  "delivery_region_port_id" TEXT,\n  "density" REAL,\n  "delivery_region_basis_port_id" TEXT,\n  "derivative_maturity_frequency" TEXT,\n  "derivative_position" TEXT,\n  "kinematic_viscosity" REAL,\n  "pour_point" REAL,\n  "flash_point" REAL,\n  "originating_region" TEXT,\n  "originating_region_basis" T

Now we have reference data

In [18]:
r = conn.execute("""
    select * from spgci_symbols
    limit 3;
""")
for row in r:
    print(row)

('AJSVB00', 'Johan Sverdrup FOB North Sea vs North Sea Dtd Strip', 'Crude oil', 'Daily (weekday)', "['c']", 'Benchmark', 'CALLN00', 'Platts - Crude Oil - Johan Sverdrup', 'Spot', 'USD', 3.0, 'DW', "['MI Crude', 'Crudes: at 16:30 London']", "['CRU', 'RI']", 'Daily (weekdays)', 'Differential', 'North Sea', 'North Sea', 'Physical', 'FOB', 28.0, 600000.0, 'BBL', 'Active - Public', 0.8, '16:30 UK', 'BBL', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None)
('AAGZU00', 'CPC Blend CIF Augusta', 'Crude oil', 'Daily (weekday)', "['c', 'h', 'l']", 'Benchmark', 'CALLN00', 'Platts - Crude Oil - CPC Blend', 'Spot', 'USD', 3.0, 'DW', "['MI Crude', 'Crudes: at 16:30 London']", "['CRU', 'RI']", 'Daily (weekdays)', 'Flat', 'Mediterranean', 'Augusta', 'Physical', 'CIF', 43.5, None, None, 'Active - Public', 0.55, '16:30 UK', 'BBL', 'MED', 806.7, None, None, None, None, None, None, None, None, None, None, None, None, None)
('AALIN00', 'Urals Recombined (RCMB)', 'Crude

Example query joining the two datasets

In [19]:
r = conn.execute("""
  SELECT
    s.symbol,
    s.description,
    s.currency,
    s.uom,
    a.bate,
    a.value,
    a.assessDate
  FROM
    spgci_symbols s
    JOIN spgci_assessments a ON s.symbol = a.symbol
  WHERE
    a.bate = "c"
  ORDER BY
    a.assessDate ASC
  LIMIT 10;
""")
for row in r:
    print(row)               

('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 106.87, datetime.datetime(2011, 8, 5, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 106.66, datetime.datetime(2011, 11, 1, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 110.835, datetime.datetime(2011, 11, 2, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 110.94, datetime.datetime(2011, 11, 3, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 112.195, datetime.datetime(2011, 11, 4, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 115.155, datetime.datetime(2011, 11, 7, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 115.8, datetime.datetime(2011, 11, 8, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 115.23, datetime.datetime(2011, 11, 9, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 113.43, datetime.datetime(2011, 11, 10, 0, 0))
('PCAAS00', 'Dated Brent', 'USD', 'BBL', 'c', 114.345, datetime.datetime(2011, 11, 11, 0, 0))
