In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from sklearn.linear_model import LinearRegression
import task1 as t1 
import task2 as t2 
from scipy.stats import spearmanr

In [2]:

data_root_min = '/Users/huayuzhu/Desktop/exam/raw_data/minute'
data_root = '/Users/huayuzhu/Desktop/exam/raw_data/daily'
output_dir = '/Users/huayuzhu/Desktop/exam/'
def get_min_data_from_csv(file_path):
    """
    Reads a CSV file into a DataFrame, with the first column as row indices.

    Parameters
    ----------
    file_path : str
        The file path of the CSV file to be read.

    Returns
    -------
    pd.DataFrame
        Transposed DataFrame with datetime as index.
    """
    try:
        data = pd.read_csv(file_path, index_col=0) 
        data.index = pd.to_datetime(data.index)
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")
    return data
def get_data_from_csv(file_path):
    """
    Reads a CSV file into a DataFrame, with the first column as row indices, 
    transposes the result, and converts the row indices to datetime.

    Parameters
    ----------
    file_path : str
        The file path of the CSV file to be read.

    Returns
    -------
    pd.DataFrame
        Transposed DataFrame with datetime as index.
    """
    try:
        data = pd.read_csv(file_path, index_col=0).T
        data.index = pd.to_datetime(data.index)
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")
    return data
amount = get_min_data_from_csv(f'{data_root_min}/amount.csv')
volume = get_min_data_from_csv(f'{data_root_min}/volume.csv')
close  = get_min_data_from_csv(f'{data_root_min}/close.csv')
open  = get_min_data_from_csv(f'{data_root_min}/open.csv')

S_DQ_RET = get_data_from_csv(f'{data_root}/S_DQ_RET.csv')
S_905_DQ_RET =  get_data_from_csv(f'{data_root}/905S_DQ_RET.csv')
S_DQ_MV = get_data_from_csv(f'{data_root}/S_DQ_MV.csv')
S_RESTRICT = get_data_from_csv(f'{data_root}/S_RESTRICT.csv')
S_DQ_OPEN = get_data_from_csv(f'{data_root}/S_DQ_OPEN.csv')
S_DQ_ADJ_FACTOR = get_data_from_csv(f'{data_root}/S_DQ_ADJFACTOR.csv')
S_DQ_CLOSE = get_data_from_csv(f'{data_root}/S_DQ_CLOSE.csv')
S_DQ_VOLUME = get_data_from_csv(f'{data_root}/S_DQ_VOLUME.csv')
FLOAT_A_SHR_TODAY = get_data_from_csv(f'{data_root}/FLOAT_A_SHR_TODAY.csv')

In [3]:
S_DQ_ADJ_FACTOR = S_DQ_ADJ_FACTOR/100
S_ADJ_CLOSE = S_DQ_CLOSE * S_DQ_ADJ_FACTOR
S_ADJ_OPEN = S_DQ_OPEN * S_DQ_ADJ_FACTOR

In [41]:
def calc_large_order_corr(volume, open, close):
    ret = close/open.shift(1) -1 

    def get_large_order_correlation_for_day(group):
        large_volume = group.rank(pct= True) > 2/3
        large_returns = ret.loc[large_volume.index]
        large_ret = large_returns.where(large_volume,np.nan)
        large_vol = group.where(large_volume,np.nan)
        correlation = large_ret.corrwith(large_vol)
        return correlation
    
    def get_large_order_correlation_for_morning(group):
        large_volume = group.rank(pct= True) > 2/3
        large_returns = ret.between_time('9:31', '11:30').loc[large_volume.index]
        large_ret = large_returns.where(large_volume,np.nan)
        large_vol = group.where(large_volume,np.nan)
        correlation = large_ret.corrwith(large_vol)
        return correlation

    full_day_correlation = volume.groupby(volume.index.date).apply(get_large_order_correlation_for_day)
    volume_morning = volume.between_time('9:31', '11:30')
    morning_correlation = volume_morning.groupby(volume_morning.index.date).apply(get_large_order_correlation_for_morning)
    

    return full_day_correlation,morning_correlation

In [42]:
day_cor,morning_cor = calc_large_order_corr(volume,open,close)

In [43]:
morning_cor

Unnamed: 0,000001,000002,000009,000027,000063,000066,000069,000100,000157,000166,...,688122,688126,688180,688188,688363,688390,688561,688599,688777,688981
2021-01-04,-0.379202,-0.598692,0.574670,0.196515,-0.590203,0.418385,-0.227693,0.144734,0.191250,-0.287319,...,0.412252,0.298939,-0.046390,0.389353,0.468628,-0.034385,0.530033,0.416965,-0.056247,0.247992
2021-01-05,-0.378985,-0.673460,-0.313473,-0.648667,0.396867,0.241189,-0.145144,0.473572,0.631487,-0.375585,...,0.124234,0.436126,0.104300,-0.157722,-0.039864,-0.084531,0.019551,-0.169499,0.400659,0.331183
2021-01-06,0.394191,0.557308,-0.457070,0.032386,-0.475601,-0.202274,0.482256,-0.510133,-0.331664,-0.012629,...,-0.060963,0.356351,0.496784,-0.096960,-0.434035,0.295598,0.083000,-0.242750,-0.586482,0.474151
2021-01-07,0.226625,0.490035,0.311505,0.107141,-0.316286,-0.283986,-0.270829,0.300045,0.593152,-0.022950,...,-0.249184,-0.427048,-0.361797,0.356574,0.113673,0.406520,-0.470183,0.368268,-0.483961,0.381093
2021-01-08,-0.522274,0.215632,-0.269574,0.042000,0.523554,0.487444,-0.021505,0.395952,-0.472546,0.294256,...,-0.062172,0.087518,0.256944,0.339232,-0.491221,-0.240393,0.297438,-0.314221,0.462531,0.562664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-26,-0.346396,-0.037940,0.204877,0.446972,-0.080491,-0.071532,0.307078,0.201286,-0.116383,-0.265404,...,0.200761,-0.087527,0.314449,0.282583,0.492548,0.362700,0.395955,0.268857,0.020010,-0.112484
2022-12-27,0.456209,0.413539,-0.726715,0.430495,0.282863,0.332555,-0.397687,-0.056148,-0.050456,-0.017102,...,0.001999,-0.276455,-0.712384,-0.009464,0.320799,0.390707,0.003150,0.096123,0.348952,0.005961
2022-12-28,0.363917,0.629091,-0.330095,0.597302,-0.229320,-0.209802,0.075083,0.283524,-0.246508,0.235775,...,0.147514,0.176842,0.533891,0.215580,0.043415,0.594781,-0.009746,0.038400,-0.275493,-0.280632
2022-12-29,-0.596167,-0.261693,-0.287102,0.770451,0.529221,0.440243,-0.455731,0.363081,-0.155038,-0.066118,...,0.019010,0.285001,-0.063679,0.526482,0.282080,0.044203,0.206615,0.347292,0.172838,0.225778


In [44]:
day_cor

Unnamed: 0,000001,000002,000009,000027,000063,000066,000069,000100,000157,000166,...,688122,688126,688180,688188,688363,688390,688561,688599,688777,688981
2021-01-04,-0.327224,-0.555101,0.531900,0.301387,0.351667,0.344849,-0.333210,0.168635,0.243753,-0.012840,...,0.322522,0.202412,-0.014754,0.326352,0.485528,0.039258,0.563508,0.422482,0.325976,0.215671
2021-01-05,-0.311909,-0.624122,-0.166841,-0.616699,0.393797,0.103684,-0.096274,0.460883,0.501025,-0.138274,...,0.076585,0.333775,-0.054076,-0.104857,0.062112,-0.142718,0.205283,-0.347443,0.292342,0.195254
2021-01-06,0.397444,0.513055,-0.400054,0.456413,-0.524403,-0.288022,0.180786,-0.412032,-0.100799,-0.071341,...,-0.048721,0.159435,0.412283,-0.146718,-0.398985,0.192866,-0.067573,-0.204029,-0.419826,0.478954
2021-01-07,0.129480,0.208718,0.270795,-0.034009,-0.291070,-0.245637,-0.120809,0.288800,0.459694,0.003380,...,-0.303452,-0.351338,-0.458833,0.079021,-0.018774,0.272091,-0.296527,0.385706,-0.179985,0.110758
2021-01-08,-0.389859,0.276109,-0.337289,0.098120,0.530902,0.380847,-0.069470,0.283693,-0.353185,0.148409,...,-0.159348,0.207230,0.253299,0.553652,-0.483761,-0.222592,0.294056,-0.273532,0.133139,0.351733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-26,-0.367198,-0.204407,0.309013,0.156453,-0.200063,0.083006,0.176314,0.170657,-0.110041,-0.167524,...,0.136319,-0.005672,0.046063,0.201307,0.277185,0.329915,0.312332,0.470643,0.088215,-0.048996
2022-12-27,0.389928,0.435110,-0.659510,0.501714,0.474480,0.191309,0.270014,-0.105913,-0.113978,-0.096518,...,0.099300,-0.016801,-0.569184,0.026589,0.483760,0.372349,-0.036199,0.122290,-0.338128,0.073497
2022-12-28,0.305535,0.529098,-0.301019,0.460728,-0.270576,-0.186261,0.107803,0.178387,-0.192081,0.095403,...,0.114818,0.162686,0.504461,0.212542,-0.067559,0.446927,-0.087814,0.017010,-0.324494,-0.249521
2022-12-29,-0.472747,-0.333919,-0.290985,0.617986,0.374248,0.458060,-0.287545,0.191152,-0.002749,0.122645,...,-0.117768,0.199060,0.137802,0.342833,0.278697,0.022903,0.259133,0.296264,0.213146,0.231307
