<a href="https://colab.research.google.com/github/jus-tinian/vix_calc/blob/master/SparkTD_SPXquotes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Create Working Env

In [0]:
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://www-eu.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
!tar xf spark-2.4.5-bin-hadoop2.7.tgz

In [0]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.5-bin-hadoop2.7"

In [3]:
!pip install -q findspark
!pip install pyspark



In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Imports

In [0]:
import requests
import datetime

import pyspark
import pandas as pd

from pyspark.sql import SparkSession

from pyspark.sql.functions import udf
from pyspark.sql.types import TimestampType, DateType

## Create Spark Session

In [0]:
APP_NAME = "VIXcalc"
SPARK_URL = "local[*]"

In [0]:
spark = SparkSession.builder.master(SPARK_URL).appName(APP_NAME).getOrCreate()

In [8]:
spark

## Make Dates

In [0]:
today = datetime.datetime.today().date()

In [0]:
farDate = today + datetime.timedelta(days=37)

In [0]:
nearDate = today + datetime.timedelta(days=23)

## Get API response

In [0]:
KEY = 'YOUR_KEY_HERE'

In [0]:
url = f"https://api.tdameritrade.com/v1/marketdata/chains?apikey={KEY}&symbol=%24SPX.X&includeQuotes=TRUE&strategy=SINGLE&range=ALL&fromDate={str(nearDate)}&toDate={str(farDate)}"

In [0]:
req = requests.get(url)

In [0]:
reqJSON = req.json()

## Set Keys to access reqJSON

In [0]:
putKey = 'putExpDateMap'

In [0]:
callKey = 'callExpDateMap'

In [0]:
dateKeys = [i for i in list(req.json()[putKey].keys()) if pd.to_datetime(i[:-3]).dayofweek == 4]

In [0]:
nearKey = min(dateKeys)
farKey = max(dateKeys)
nearExpiration = datetime.datetime.strptime(nearKey[:-3], '%Y-%m-%d').date()
farExpiration = datetime.datetime.strptime(farKey[:-3], '%Y-%m-%d').date()

## Set Interest Rate

In [0]:
RATE = reqJSON['interestRate']

## Set Columns to Keep

In [0]:
minKeepCols = ['putCall', 'symbol', 'bid', 'ask', 'expirationDate', 'daysToExpiration', 'inTheMoney', 'strikePrice']

## UDFs

In [0]:
convertEpochToDate = udf(lambda msEpoch: datetime.datetime.fromtimestamp(msEpoch/1000).date(), DateType())

In [0]:
convertEpochToTimeStamp = udf(lambda msEpoch: datetime.datetime.fromtimestamp(msEpoch/1000), TimestampType())

## Prep Pipe

In [0]:
def prep_options(source_df, jsonObj, strikesList, sideKey, termKey, minKeepColsParam=minKeepCols):

  for idx, val in enumerate(strikesList):

      pdf = pd.DataFrame(jsonObj[sideKey][termKey][strikesList[idx]])[minKeepColsParam]

      if pdf['bid'].to_numpy().all() != 0:
        source_df = source_df.union(spark.createDataFrame(pdf))

  source_df1 = source_df.where('bid != 0')

  nonWeeklyCount = source_df1.filter(~source_df1.symbol.contains('W')).count()

  if nonWeeklyCount != 0:
    source_df1 = source_df1.filter(~source_df1.symbol.contains('W'))

  source_df2 = source_df1.withColumn(
      'expiration',
      convertEpochToDate('expirationDate')).select(
          'strikePrice',
          'putCall',
          'symbol',
          'bid',
          'ask',
          'daysToExpiration',
          'inTheMoney',
          'expiration')
      
  return source_df2

# Near Puts

In [0]:
nearPutMinStrikeKey = list(reqJSON[putKey][nearKey].keys())[0]

In [0]:
nearPutStrikes = list(reqJSON[putKey][nearKey].keys())[1:]

In [0]:
rawNearPuts = spark.createDataFrame(pd.DataFrame(reqJSON[putKey][nearKey][nearPutMinStrikeKey])[minKeepCols])

In [0]:
nearPuts = prep_options(rawNearPuts, reqJSON, nearPutStrikes, putKey, nearKey)

In [36]:
nearPuts.show(5)

+-----------+-------+----------------+----+----+----------------+----------+----------+
|strikePrice|putCall|          symbol| bid| ask|daysToExpiration|inTheMoney|expiration|
+-----------+-------+----------------+----+----+----------------+----------+----------+
|     1150.0|    PUT|SPXW_060520P1150|0.05| 0.1|              23|     false|2020-06-05|
|     1200.0|    PUT|SPXW_060520P1200|0.05| 0.1|              23|     false|2020-06-05|
|     1250.0|    PUT|SPXW_060520P1250|0.05|0.15|              23|     false|2020-06-05|
|     1300.0|    PUT|SPXW_060520P1300|0.05| 0.2|              23|     false|2020-06-05|
|     1350.0|    PUT|SPXW_060520P1350|0.05| 0.2|              23|     false|2020-06-05|
+-----------+-------+----------------+----+----+----------------+----------+----------+
only showing top 5 rows



## Far Puts

In [0]:
farPutMinStrikeKey = list(reqJSON[putKey][farKey].keys())[0]

In [0]:
farPutStrikes = list(reqJSON[putKey][farKey].keys())[1:]

In [0]:
rawFarPuts = spark.createDataFrame(pd.DataFrame(reqJSON[putKey][farKey][farPutMinStrikeKey])[minKeepCols])

In [0]:
farPuts = prep_options(rawFarPuts, reqJSON, farPutStrikes, putKey, farKey)

In [35]:
farPuts.show(5)

+-----------+-------+---------------+----+----+----------------+----------+----------+
|strikePrice|putCall|         symbol| bid| ask|daysToExpiration|inTheMoney|expiration|
+-----------+-------+---------------+----+----+----------------+----------+----------+
|      900.0|    PUT| SPX_061920P900|0.05| 0.1|              37|     false|2020-06-19|
|     1000.0|    PUT|SPX_061920P1000|0.05|0.15|              37|     false|2020-06-19|
|     1100.0|    PUT|SPX_061920P1100| 0.1| 0.2|              37|     false|2020-06-19|
|     1150.0|    PUT|SPX_061920P1150| 0.1| 0.2|              37|     false|2020-06-19|
|     1200.0|    PUT|SPX_061920P1200|0.15|0.25|              37|     false|2020-06-19|
+-----------+-------+---------------+----+----+----------------+----------+----------+
only showing top 5 rows



# Near Calls

In [0]:
nearCallMinStrikeKey = list(reqJSON[callKey][nearKey].keys())[0]

In [0]:
nearCallStrikes = list(reqJSON[callKey][nearKey].keys())[1:]

In [0]:
rawNearCalls = spark.createDataFrame(pd.DataFrame(reqJSON[callKey][nearKey][nearCallMinStrikeKey])[minKeepCols])

In [0]:
nearCalls = prep_options(rawNearCalls, reqJSON, nearCallStrikes, callKey, nearKey)

In [41]:
nearCalls.show(5)

+-----------+-------+---------------+------+------+----------------+----------+----------+
|strikePrice|putCall|         symbol|   bid|   ask|daysToExpiration|inTheMoney|expiration|
+-----------+-------+---------------+------+------+----------------+----------+----------+
|      500.0|   CALL|SPXW_060520C500|2311.6|2317.7|              23|      true|2020-06-05|
|      600.0|   CALL|SPXW_060520C600|2211.5|2217.8|              23|      true|2020-06-05|
|      700.0|   CALL|SPXW_060520C700|2111.7|2117.8|              23|      true|2020-06-05|
|      800.0|   CALL|SPXW_060520C800|2011.6|2017.8|              23|      true|2020-06-05|
|      900.0|   CALL|SPXW_060520C900|1911.8|1917.9|              23|      true|2020-06-05|
+-----------+-------+---------------+------+------+----------------+----------+----------+
only showing top 5 rows



## Far Calls

In [0]:
farCallMinStrikeKey = list(reqJSON[callKey][farKey].keys())[0]

In [0]:
farCallStrikes = list(reqJSON[callKey][farKey].keys())[1:]

In [0]:
rawFarCalls = spark.createDataFrame(pd.DataFrame(reqJSON[callKey][farKey][farCallMinStrikeKey])[minKeepCols])

In [0]:
farCalls = prep_options(rawFarCalls, reqJSON, farCallStrikes, callKey, farKey)

In [46]:
farCalls.show(5)

+-----------+-------+--------------+------+------+----------------+----------+----------+
|strikePrice|putCall|        symbol|   bid|   ask|daysToExpiration|inTheMoney|expiration|
+-----------+-------+--------------+------+------+----------------+----------+----------+
|      100.0|   CALL|SPX_061920C100|2706.7|2718.0|              37|      true|2020-06-19|
|      200.0|   CALL|SPX_061920C200|2606.8|2618.0|              37|      true|2020-06-19|
|      300.0|   CALL|SPX_061920C300|2506.7|2518.0|              37|      true|2020-06-19|
|      400.0|   CALL|SPX_061920C400|2406.8|2418.1|              37|      true|2020-06-19|
|      500.0|   CALL|SPX_061920C500|2306.8|2318.1|              37|      true|2020-06-19|
+-----------+-------+--------------+------+------+----------------+----------+----------+
only showing top 5 rows

