In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat
from scipy.stats import norm
import powerlaw
%matplotlib inline

plt.rcParams.update({"font.family": "STIXGeneral",
                      "font.size": 20,
                      "mathtext.fontset": "cm"})

In [2]:
matdata = loadmat('./SSEC_min.mat')

In [3]:
p_min = matdata['p'][:,0] # 获取到上证指数的分钟数据
r_min1 = np.log(p_min[1:])-np.log(p_min[:-1]) #计算上证指数的1分钟收益率序列

p_min5 = p_min[::5]  # 按照5分钟一次的尺度进行采样
r_min5 = np.log(p_min5[1:]) - np.log(p_min5[:-1]) #计算上证指数的5分钟收益率序列

p_min10 = p_min[::10]  # 按照10分钟一次的尺度进行采样
r_min10 = np.log(p_min10[1:]) - np.log(p_min10[:-1]) #计算上证指数的10分钟收益率序列

p_min30 = p_min[::30]  # 按照30分钟一次的尺度进行采样
r_min30 = np.log(p_min30[1:]) - np.log(p_min30[:-1]) #计算上证指数的30分钟收益率序列

p_min60 = p_min[::60]  # 按照60分钟一次的尺度进行采样
r_min60 = np.log(p_min60[1:]) - np.log(p_min60[:-1]) #计算上证指数的60分钟收益率序列

p_min120 = p_min[::120]  # 按照120分钟一次的尺度进行采样
r_min120 = np.log(p_min120[1:]) - np.log(p_min120[:-1]) #计算上证指数的120分钟收益率序列

p_min240 = p_min[::240]  # 按照240分钟一次的尺度进行采样
r_min240 = np.log(p_min240[1:]) - np.log(p_min240[:-1]) #计算上证指数的240分钟收益率序列

# 对各个时间尺度的收益率序列的异常值进行排除
r_min1 = r_min1[(r_min1>=-0.1)&(r_min1<=0.1)]
r_min5 = r_min5[(r_min5>=-0.1)&(r_min5<=0.1)]
r_min10 = r_min10[(r_min10>=-0.1)&(r_min10<=0.1)]
r_min30 = r_min30[(r_min30>=-0.1)&(r_min30<=0.1)]
r_min60 = r_min60[(r_min60>=-0.1)&(r_min60<=0.1)]
r_min120 = r_min120[(r_min120>=-0.1)&(r_min120<=0.1)]
r_min240 = r_min240[(r_min240>=-0.1)&(r_min240<=0.1)]
# 1分钟、5分钟、10分钟、30分钟、60分钟、120分钟、240分钟的收益率序列计算完成

  """
  """
  
  # This is added back by InteractiveShellApp.init_path()
  


In [81]:
# 用于计算经验概率密度的函数
def myfun_emp_pdf(data_sample, num_bin=93):
    bin = np.linspace(np.min(data_sample), np.max(data_sample), num_bin)
    x_emp = np.zeros(len(bin)-1)
    y_emp = np.zeros(len(bin)-1)
    for i in range(len(bin)-1):
        x_emp[i] = (bin[i] + bin[i+1])/2
        y_emp[i] = np.sum( (data_sample >= bin[i]) & (data_sample < bin[i+1]) )/len(data_sample)/(bin[i+1] - bin[i])
    return x_emp, y_emp

In [82]:
x_emp_min1, y_emp_min1 = myfun_emp_pdf(r_min1)
x_emp_min5, y_emp_min5 = myfun_emp_pdf(r_min5)
x_emp_min10, y_emp_min10 = myfun_emp_pdf(r_min10)
x_emp_min30, y_emp_min30 = myfun_emp_pdf(r_min30)
x_emp_min60, y_emp_min60 = myfun_emp_pdf(r_min60)
x_emp_min120, y_emp_min120 = myfun_emp_pdf(r_min120)
x_emp_min240, y_emp_min240 = myfun_emp_pdf(r_min240)

In [119]:
def powerlawTest(data):
        fit = powerlaw.Fit(data[::100])
        print('alpha:', fit.alpha)
        print('xmin:', fit.xmin)
        R, p = fit.distribution_compare('power_law', 'lognormal')
        print('log-likelihood ratio test:', R)
        print('p-value:', p)
        plt.show()

In [120]:
powerlawTest(r_min1)

Values less than or equal to 0 in data. Throwing out 0 or negative values


Calculating best minimal value for power law fit
alpha: 3.2686782360085647
xmin: 0.0007267867063465161
log-likelihood ratio test: -3.515878967208298
p-value: 0.15422223839840396


In [122]:
powerlawTest(r_min240)

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 03%xmin progress: 06%xmin progress: 10%xmin progress: 13%xmin progress: 17%xmin progress: 20%xmin progress: 24%xmin progress: 27%xmin progress: 31%xmin progress: 34%xmin progress: 37%xmin progress: 41%xmin progress: 44%xmin progress: 48%xmin progress: 51%xmin progress: 55%xmin progress: 58%xmin progress: 62%xmin progress: 65%xmin progress: 68%xmin progress: 72%xmin progress: 75%xmin progress: 79%xmin progress: 82%xmin progress: 86%xmin progress: 89%xmin progress: 93%xmin progress: 96%alpha: 3.7610901613229477
xmin: 0.017267565339982305
log-likelihood ratio test: -0.5826431087100146
p-value: 1.9784162487662318e-05


  self.alpha = 1 + (self.n / sum(log(data/self.xmin)))
  return 1 + len(data)/sum(log(data / (self.xmin)))
  CDF = CDF/norm
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
  return umr_maximum(a, axis, None, out, keepdims, initial)
  return umr_minimum(a, axis, None, out, keepdims, initial)


In [123]:
powerlawTest(r_min5)

Values less than or equal to 0 in data. Throwing out 0 or negative values


Calculating best minimal value for power law fit


xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 07%xmin progres

In [124]:
powerlawTest(r_min10)

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 01%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 03%xmin progress: 03%xmin progress: 03%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 04%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 05%xmin progress: 06%xmin progress: 06%xmin progress: 06%xmin progress: 07%xmin progress: 07%xmin progress: 07%xmin progress: 07%xmin progress: 08%xmin progress: 08%xmin progress: 08%xmin progress: 08%xmin progress: 09%xmin progress: 09%xmin progress: 09%xmin progress: 10%xmin progress: 10%xmin progress: 10%xmin progress: 10%xmin progress: 11%xmin progress: 11%xmin progress: 11%xmin progress: 11%xmin progress: 12%xmin progress: 12%xmin progress: 12%xmin progress: 13%xmin progress: 13%x

Values less than or equal to 0 in data. Throwing out 0 or negative values
  CDF = CDF/norm
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
  return umr_maximum(a, axis, None, out, keepdims, initial)
  return umr_minimum(a, axis, None, out, keepdims, initial)


In [125]:
powerlawTest(r_min30)

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 00%xmin progress: 00%xmin progress: 01%xmin progress: 01%xmin progress: 02%xmin progress: 02%xmin progress: 02%xmin progress: 03%xmin progress: 03%xmin progress: 04%xmin progress: 04%xmin progress: 05%xmin progress: 05%

  self.alpha = 1 + (self.n / sum(log(data/self.xmin)))
  return 1 + len(data)/sum(log(data / (self.xmin)))


alpha: 2.6507442606955944
xmin: 0.003622808835492819
log-likelihood ratio test: -0.07813555533856792
p-value: 0.11787100097638763


  CDF = CDF/norm
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
'nan' in fit cumulative distribution values.
Likely underflow or overflow error: the optimal fit for this distribution gives values that are so extreme that we lack the numerical precision to calculate them.
  return umr_maximum(a, axis, None, out, keepdims, initial)
  return umr_minimum(a, axis, None, out, keepdims, initial)


In [126]:
powerlawTest(r_min60)

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 00%xmin progress: 01%xmin progress: 02%xmin progress: 03%xmin progress: 04%xmin progress: 05%xmin progress: 05%xmin progress: 06%xmin progress: 07%xmin progress: 08%xmin progress: 09%xmin progress: 10%xmin progress: 10%xmin progress: 11%xmin progress: 12%xmin progress: 13%

  self.alpha = 1 + (self.n / sum(log(data/self.xmin)))
  return 1 + len(data)/sum(log(data / (self.xmin)))


alpha: 2.737534069630976
xmin: 0.005582460623935859
log-likelihood ratio test: -1.3356840505113488
p-value: 0.28604227547586014


In [127]:
powerlawTest(r_min120)

Calculating best minimal value for power law fit
xmin progress: 00%xmin progress: 01%xmin progress: 03%xmin progress: 05%xmin progress: 06%xmin progress: 08%xmin progress: 10%xmin progress: 11%xmin progress: 13%xmin progress: 15%xmin progress: 16%xmin progress: 18%xmin progress: 20%xmin progress: 22%xmin progress: 23%xmin progress: 25%xmin progress: 27%xmin progress: 28%

  self.alpha = 1 + (self.n / sum(log(data/self.xmin)))
  return 1 + len(data)/sum(log(data / (self.xmin)))


xmin progress: 30%xmin progress: 32%xmin progress: 33%xmin progress: 35%xmin progress: 37%xmin progress: 38%xmin progress: 40%xmin progress: 42%xmin progress: 44%xmin progress: 45%xmin progress: 47%xmin progress: 49%xmin progress: 50%xmin progress: 52%xmin progress: 54%xmin progress: 55%xmin progress: 57%xmin progress: 59%xmin progress: 61%xmin progress: 62%xmin progress: 64%xmin progress: 66%xmin progress: 67%xmin progress: 69%xmin progress: 71%xmin progress: 72%xmin progress: 74%xmin progress: 76%xmin progress: 77%xmin progress: 79%xmin progress: 81%xmin progress: 83%xmin progress: 84%xmin progress: 86%xmin progress: 88%xmin progress: 89%xmin progress: 91%xmin progress: 93%xmin progress: 94%xmin progress: 96%xmin progress: 98%alpha: 2.2476185847265686
xmin: 0.004732272734910836
log-likelihood ratio test: -0.30104123804028116
p-value: 0.6582362086812933
