In [28]:
import numpy as np
from numba import jit, prange


@jit(nopython=True)
def compute_returns(prices, horizon):
    # Adjusted to compute future returns
    num_points = len(prices) - horizon
    returns = np.empty(num_points)
    for i in range(num_points):
        returns[i] = np.log(prices[i + horizon] / prices[i]) * 10000
    # Extend the returns array with NaNs to match the original prices length
    full_returns = np.full_like(prices, np.nan, dtype=np.float64)
    full_returns[:num_points] = returns
    return full_returns


@jit(nopython=True)
def compute_moments_and_volatility(returns, start, end):
    # Compute moments and volatility for a given sub-range
    if end <= len(returns) and not np.isnan(returns[start:end]).all():
        ret_window = returns[start:end]
        mean = np.mean(ret_window)
        variance = np.var(ret_window)
        rv = np.sqrt(np.sum(ret_window**2))
        return mean, variance, rv
    return None, None, None


@jit(nopython=True, parallel=True)
def analyze_prices(prices, horizons, feature_intervals):
    # Initialize arrays to store the results
    means = np.full((len(prices), len(horizons), len(feature_intervals)), np.nan)
    variances = np.full((len(prices), len(horizons), len(feature_intervals)), np.nan)
    rvs = np.full((len(prices), len(horizons), len(feature_intervals)), np.nan)

    for idx, horizon in enumerate(horizons):
        all_returns = compute_returns(prices, horizon)
        for start in prange(len(prices) - horizon):
            for f_idx, (start_interval, end_interval) in enumerate(feature_intervals):
                adjusted_start = start + start_interval
                adjusted_end = start + end_interval
                if adjusted_end <= len(all_returns):
                    mean, variance, rv = compute_moments_and_volatility(
                        all_returns, adjusted_start, adjusted_end
                    )
                    if mean is not None:
                        means[start, idx, f_idx] = mean
                        variances[start, idx, f_idx] = variance
                        rvs[start, idx, f_idx] = rv

    return means, variances, rvs


# Example usage:
prices = np.random.normal(loc=100, scale=5, size=2000)  # Example prices with more data
horizons = [20, 60, 120]  # Calculate future returns over 1 min, 3 min, 6 min
feature_intervals = [(0, 60), (60, 120), (120, 180)]  # Different intervals
means, variances, rvs = analyze_prices(prices, horizons, feature_intervals)
print("Means shape:", means.shape)
print("Variances shape:", variances.shape)
print("RVs shape:", rvs.shape)

Means shape: (2000, 3, 3)
Variances shape: (2000, 3, 3)
RVs shape: (2000, 3, 3)


In [29]:
import nutils
import common as cm
import pandas as pd

In [50]:
for code in cm.SELECTED_CODES:
    print(code)
    datas = cm.get_snapshot(code)
    df = datas['tickData']
    df = pd.DataFrame(df,columns=cm.COLS_SNAPSHOTS)
    mid_price = (df.AskPrice1 + df.AskPrice2)/2
    mid_price = mid_price.values
    horizons = [10, 30, 60]  # Calculate future returns over 1 min, 3 min, 6 min
    feature_intervals = [(0, 60), (60, 120), (60, 180)]  # Different intervals
    means, variances, rvs = analyze_prices(mid_price, horizons, feature_intervals)
    # fp_mean = np.memmap(f"/mnt/disk1/snapshot_dataset/labels/mean_{code}",dtype=np.float32,mode="w+",shape=means.shape)
    # fp_mean[:] = means[:]
    # fp_mean.flush()
    # fp_mean.flags['WRITEABLE'] = False
    
    # fp_rv = np.memmap(f"/mnt/disk1/snapshot_dataset/labels/rv_{code}",dtype=np.float32,mode="w+",shape=rvs.shape)
    # fp_rv[:] = rvs[:]
    # fp_rv.flush()
    # fp_rv.flags['WRITEABLE'] = False
    
    # fp_var = np.memmap(f"/mnt/disk1/snapshot_dataset/labels/var_{code}",dtype=np.float32,mode="w+",shape=variances.shape)
    # fp_var[:] = variances[:]
    # fp_var.flush()
    # fp_var.flags['WRITEABLE'] = False

000537
000627
000925
000950
002058
002166
002308
002399
002498
002557
002577
002594
002901
002941
002946
300053
300137
300141
300215
300225
300241
300252
300366
300498
300564
300605
300640
300688
300713
300867
300870
300908
300913
600006
600012
600107
600123
600127
600163
600176
600218
600232
600267
600302
600395
600426
600428
600493
600557
600578
600644
600647
600665
600704
600740
600797
600817
600834
600859
600862
600893
600984
601019
601330
601881
603006
603017
603018
603037
603192
603212
603269
603357
603368
603388
603390
603559
603595
603693
603712
603777
603818
603856
603878
603939
603990
605128
605166
688057
688165
688215
688286
688309
688313
688366
688386
688668
688678
688777
689009


In [42]:
mid_price = mid_price.values

In [None]:
mid_price.sahpe

Means shape: (1828412, 3, 3)
Variances shape: (1828412, 3, 3)
RVs shape: (1828412, 3, 3)


In [45]:
ret = np.memmap('A:/data/factors/label_000537.npy',dtype=np.float32,mode='r')

In [48]:
from scipy.stats import pearsonr
for i in range(3):
    for j in range(3):
        print(i,j)
        rvs_ij = rvs[:,i,j]
        mean_ij = means[:,i,j]
        var_ij = variances[:,i,j]
        print(pearsonr(np.nan_to_num(rvs_ij), np.nan_to_num(ret)))
        print(pearsonr(np.nan_to_num(rvs_ij), np.nan_to_num(mean_ij)))
        print(pearsonr(np.nan_to_num(rvs_ij), np.nan_to_num(var_ij)))

0 0
PearsonRResult(statistic=0.056276941578786085, pvalue=0.0)
PearsonRResult(statistic=0.7000510958987833, pvalue=0.0)
PearsonRResult(statistic=0.963906367999515, pvalue=0.0)
0 1
PearsonRResult(statistic=0.06835711380887606, pvalue=0.0)
PearsonRResult(statistic=0.7000521941136941, pvalue=0.0)
PearsonRResult(statistic=0.9639065435792427, pvalue=0.0)
0 2
PearsonRResult(statistic=0.04878679705530662, pvalue=0.0)
PearsonRResult(statistic=0.700052456790514, pvalue=0.0)
PearsonRResult(statistic=0.9639064305841797, pvalue=0.0)
1 0
PearsonRResult(statistic=0.05831562861633175, pvalue=0.0)
PearsonRResult(statistic=0.6354073332908983, pvalue=0.0)
PearsonRResult(statistic=0.923722365622294, pvalue=0.0)
1 1
PearsonRResult(statistic=0.0642842576214014, pvalue=0.0)
PearsonRResult(statistic=0.6354093798375037, pvalue=0.0)
PearsonRResult(statistic=0.9237224630752968, pvalue=0.0)
1 2
PearsonRResult(statistic=0.04923948471443939, pvalue=0.0)
PearsonRResult(statistic=0.6354094185126427, pvalue=0.0)
Pear