In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import statistics
import datetime, time
import yfinance as yf
import pandas as pd
from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn
import random

In [2]:
def sign(x):
    return 1 if x>0 else -1

In [3]:
def BSM_call(S0, strike, y_frac, sigma, rate):
    nd = statistics.NormalDist(0,1)
    d1 = (np.log(S0/strike)+(rate+sigma*sigma/2)*y_frac)/sigma/math.sqrt(y_frac)
    d2 = d1 - sigma * math.sqrt(y_frac)
    return S0*nd.cdf(d1) -  strike*math.exp((-1)*rate*y_frac)*nd.cdf(d2)

def BSM_put(S0, strike, y_frac, sigma, rate):
    nd = statistics.NormalDist(0,1)
    d1 = (np.log(S0/strike)+(rate+sigma*sigma/2)*y_frac)/sigma/math.sqrt(y_frac)
    d2 = d1 - sigma * math.sqrt(y_frac)
    return -S0*nd.cdf(-d1) +  strike*math.exp((-1)*rate*y_frac)*nd.cdf(-d2)

In [4]:
def hurst(ts):
    npts = np.array(ts)
    logs = log(npts[:-1]/npts[1:])
    groups = np.arange(int(len(logs) / 10), 1, -1)
    RS = np.zeros(len(groups)+1)
    line = np.zeros(len(groups)+1)
    for group in groups:
        sub_RS = np.zeros(group)
        stdev = np.zeros(group)
        line[group-1] = len(logs)/group
        for sub in range(group):
            RSmax, RSmin = 0, 0
            sub_group = np.array(logs[int(len(logs)/group)*sub : int(len(logs)/group*(sub+1))])
            for in_sub in range(1, int(len(logs)/group)):
                sub_RS[sub] += sub_group[in_sub]
                RSmax = max(sub_RS[sub], RSmax)
                RSmin = min(sub_RS[sub], RSmin)
            sub_RS[sub] = RSmax - RSmin
            stdev[sub] = std(sub_group)
        a = sub_RS / stdev
        RS[group-1] = a.mean()
    poly = polyfit(log(line[1:]), log(RS[1:]), 1)
    H = poly[0] 
    return H 

In [5]:
def getparams(ticker, strt, strt1, endt, endt1, intrvl):
    format = '%Y-%m-%d'
    dtstrt = datetime.datetime.strptime(strt, format)
    dtendt = datetime.datetime.strptime(endt, format)
    time_delta = dtendt-dtstrt
    time_marshall_as_year = time_delta.total_seconds() / 365.25 / 86400
    
    try:
        
        data = yf.download(ticker, start=strt1, end=endt1, interval=intrvl)
        zero = 1 / len(data)

        s1 = data['Adj Close'][len(data['Adj Close'])-1]

        data = yf.download(ticker, start=strt, end=endt, interval=intrvl)
        zero = 1 / len(data)
        if math.isnan(data['Adj Close'][0]):
            data = data.iloc[:][1:]
        zero = 1 / len(data)

        tau = time_marshall_as_year / len(data['Adj Close'])
        #print("year frac = ", time_marshall_as_year)
        #print("tau = ", tau)
        ticks = np.arange(1,len(data['Adj Close'])+1)
        obs = len(ticks)
        #plt.plot(ticks, data['Adj Close'])
        #plt.show()
        np_data = data['Adj Close']
        log_data = np.zeros(len(np_data))
        div = np_data[0]
        log_data = np.log(np_data / div)
        XT = log_data[len(log_data)-1]
        mean = XT / len(log_data)
        var = 0
        for i in range(1, len(log_data)):
            var += (log_data[i] - log_data[i-1]) * (log_data[i] - log_data[i-1])
        var /= len(log_data) * tau
        mu = XT/time_marshall_as_year
        sigma = math.sqrt(var)
        hrst = hurst(data['Adj Close'])
        call = "Call" if mu > 0 else "Put"
        r = 0.025
        s0 = np_data[0]
        s01 = np_data[len(ticks)-1]
        tmy = time_marshall_as_year
        bsm = BSM_call(s01, s01, tmy, sigma, r) if mu > 0 else BSM_put(s01, s01, tmy, sigma, r)
        res = (sign(mu)*(-s01 + s1) - bsm) / bsm
        res = res if res > -1 else -1
        sgn = "+" if mu>0 else "-"
        
        cbsm = BSM_call(s01, s01, tmy, sigma, r)
        cr = (s1 - s01 - cbsm) / cbsm
        cr = cr if cr > -1 else -1
        
        pbsm = BSM_put(s01, s01, tmy, sigma, r)
        pr = (s01 - s1 - pbsm) / pbsm
        pr = pr if pr > -1 else -1
        
        rsgn = sign(random.uniform(-3, 3))
        rcall = "Call" if rsgn > 0 else "Put"
        rbsm = BSM_call(s01, s01, tmy, sigma, r) if rsgn > 0 else BSM_put(s01, s01, tmy, sigma, r)
        rr = (rsgn*(-s01 + s1) - rbsm) / rbsm
        rr = rr if rr > -1 else -1
        
        print (sgn, "s0 = ", np_data[0],"s01 = ", s01, "s1 = ", s1, 'bsm = ', bsm, 'res = ', res)
        #print('Hurst =',hrst,'Sigma =',sigma,'Mu =',mu,'BSM_res = ',res)
        print()
        return obs, mu, sigma, hrst, call, res, cr, pr, rcall, rr
    except (ZeroDivisionError, TypeError, IndexError, NameError, KeyError):
        print("Error")
        return 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [16]:
time_list = []
for y in range(19, 22):
    for m in range (1, 13):
        time_list.append("20"+str(y)+"-"+str(m))
time_list=time_list[5:]

In [17]:
results = []
litera = 'C'
index = 0

In [18]:
ticker_list = pd.read_csv('C:/Users/prof-/OneDrive/Рабочий стол/ПНИПУ/FinanceMath/tickers/'+litera+'_ticks.csv', index_col=0)

In [19]:
a = len(ticker_list['Символ'])
b = len(time_list[:-7])
c = a * b

In [20]:
print(c)
print(index)

5328
0


In [21]:
for i in range(index+1, len(ticker_list['Символ'])-1):
    try:
        tick = ticker_list['Символ'][i]
        index = i
        tr = yf.download(tick, start=time_list[0]+"-16", end=time_list[6]+"-16", interval="1h")
        zero = 1 / len(tr)
        for j in range(len(time_list)-6):
            results.append(getparams(tick, time_list[j]+"-15", time_list[j+5]+"-16", time_list[j+3]+"-15", time_list[j+6]+"-16", '1h'))
        print("Processing "+str(i/a)+"% ...")
    except (ZeroDivisionError, TypeError, IndexError, NameError, KeyError):
        print("Error")

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CJES: No data found for this date range, symbol may be delisted
Error
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- PFH: Data doesn't exist for startDate = 1560625200, endDate = 1576436400
Error
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CAB: No data found for this date range, symbol may be delisted
Error
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  45.63999938964844 s01 =  47.2400016784668 s1 =  47.41999816894531 bsm =  3.4866014951419295 res =  -0.9483748025894801

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
- s0 =  45.689998626708984 s01 =  43

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  16.649999618530273 s01 =  18.190000534057617 s1 =  19.405000686645508 bsm =  1.9564267779254347 res =  -0.3789697798574101

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  17.389999389648438 s01 =  19.530000686645508 s1 =  17.6299991607666 bsm =  3.0593522079665174 res =  -1

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  14.829999923706055 s01 =  18.600000381469727 s1 =  20.030000686645508 bsm =  3.04158373413836 res =  -0.5298500944999033

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  18.540000915527344 s01 =  20.059999465942383 s1 =  18.79999923706

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  223.07000732421875 s01 =  243.40499877929688 s1 =  249.77999877929688 bsm =  15.270119670978517 res =  -0.5825180065801353

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  217.4499969482422 s01 =  245.11000061035156 s1 =  257.4599914550781 bsm =  14.623159931874952 res =  -0.1554499231177408

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
- s0 =  237.17999267578125 s01 =  236.8800048828125 s1 =  259.9800109863281 bsm =  11.08550085784772 res =  -1

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
- s0 =  243.08999633789062 s01 =  241.4499969482422 s1 =  256.410003662109

[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CAP: Data doesn't exist for startDate = 1560625200, endDate = 1576436400
Error
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- DVR: No data found for this date range, symbol may be delisted
Error
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- CCC: No data found, symbol may be delisted
Error
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  51.150001525878906 s01 =  54.099998474121094 s1 =  49.33000183105469 bsm =  2.2246865988356497 res =  -1

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
+ s0 =  51.0099983215332 s01 =  54.78499984741211 s1 =  50.25 bsm =  2.

LinAlgError: SVD did not converge in Linear Least Squares

In [None]:
dtfrm = pd.DataFrame(results[1:],columns=['Observations', 'Trend', 'Vol', 'Hurst', 'Option', 'Res', 'CallOnly', 'PutOnly', 'RandDec', 'rRes'])
print(index)
dtfrm = dtfrm[dtfrm['Observations'] >250 ]
dtfrm = dtfrm[dtfrm['Vol'] < 1]

In [None]:
dtfrm.describe()

In [None]:
writer = pd.ExcelWriter('C:/Users/prof-/OneDrive/Рабочий стол/ПНИПУ/FinanceMath/tickers/'+litera+'_specs.xlsx', engine='xlsxwriter')
dtfrm.to_excel(writer, 'Sheet1')
writer.save()