In [1]:
import json
import math
from scripts_stock.cfg.out_file_name import OutFileName
from scripts_stock.cfg.set_dir import ProjectDir
from scripts_stock.cfg.stock_list import *
from scripts_stock.utils.analysis.stock_stat_index import df_to_stock_df, stock_kdj
import os
from scripts_stock.utils.common import CommonScript
from scripts_stock.utils.string_process import StringProcess

import plotly.figure_factory as ff
import numpy as np
from scripts_stock.data_base.get_table_info import GetDataFromDB
from scripts_stock.utils.datetime.date_function import date_today_yesterday



In [2]:
today_date, _, two_month_ago = date_today_yesterday(150)

In [17]:

stock_index_in = 601816
start_date = two_month_ago
end_date = today_date
sql_str = f"""
   select distinct stock_index,date,
   small_order_net_inflow_amount,medium_order_net_inflow_amount,
     large_order_net_inflow_amount,
     super_large_order_net_inflow_amount
     from r_t_cash_flow_stocks 
     where date>='{start_date}' and date<='{end_date}'
        
"""

conn = CommonScript.connect_to_db("test.db")
df1 = pd.read_sql_query(sql_str, conn)
df1.tail(4)

Unnamed: 0,stock_index,date,small_order_net_inflow_amount,medium_order_net_inflow_amount,large_order_net_inflow_amount,super_large_order_net_inflow_amount
29098,600029,2024-12-25,3648382.0,28199292.0,21910677.0,-53758351.0
29099,601288,2024-12-25,15651520.0,42619408.0,38743216.0,-97014160.0
29100,600085,2024-12-25,4218893.0,3675895.0,-6378369.0,-1516419.0
29101,792,2024-12-25,3712419.0,29096132.0,22771523.0,-55580074.0


In [18]:
def normalize_row(row):
    # 获取所有正数和负数
    positive_values = row[row > 0]
    negative_values = row[row < 0]

    # 计算正数和负数的总和
    sum_positive = positive_values.sum()
    sum_negative = negative_values.sum()

    # 避免除以零
    if sum_positive != 0:
        row[row > 0] = positive_values / sum_positive
    if sum_negative != 0:
        row[row < 0] = (negative_values / sum_negative)*-1

    return row


# 应用归一化函数到每一列
df_v2 = df1[["small_order_net_inflow_amount", "medium_order_net_inflow_amount",
    "large_order_net_inflow_amount", "super_large_order_net_inflow_amount"]]
df_normalized2 = df_v2.apply(normalize_row, axis=1)
df_normalized2

Unnamed: 0,small_order_net_inflow_amount,medium_order_net_inflow_amount,large_order_net_inflow_amount,super_large_order_net_inflow_amount
0,0.057474,0.942526,-0.990696,-0.009304
1,-0.071794,0.957971,-0.928206,0.042029
2,-0.146593,-0.420401,-0.433006,1.000000
3,0.702143,0.212972,0.084885,-1.000000
4,0.214109,-1.000000,0.366144,0.419747
...,...,...,...,...
29097,-0.440979,-0.411677,1.000000,-0.147344
29098,0.067866,0.524556,0.407577,-1.000000
29099,0.161332,0.439311,0.399356,-1.000000
29100,0.534390,0.465610,-0.807922,-0.192078


In [19]:
df_merged = pd.merge(df1, df_normalized2, left_index=True, right_index=True)
df_merged.round(3)

Unnamed: 0,stock_index,date,small_order_net_inflow_amount_x,medium_order_net_inflow_amount_x,large_order_net_inflow_amount_x,super_large_order_net_inflow_amount_x,small_order_net_inflow_amount_y,medium_order_net_inflow_amount_y,large_order_net_inflow_amount_y,super_large_order_net_inflow_amount_y
0,69,2024-07-29,281670.0,4619186.0,-4855257.0,-45599.0,0.057,0.943,-0.991,-0.009
1,69,2024-07-30,-181497.0,2421767.0,-2346518.0,106249.0,-0.072,0.958,-0.928,0.042
2,69,2024-07-31,-1482745.0,-4252232.0,-4379730.0,10114708.0,-0.147,-0.420,-0.433,1.000
3,69,2024-08-01,5326910.0,1615740.0,643995.0,-7586645.0,0.702,0.213,0.085,-1.000
4,69,2024-08-02,878548.0,-4103274.0,1502390.0,1722336.0,0.214,-1.000,0.366,0.420
...,...,...,...,...,...,...,...,...,...,...
29097,601857,2024-12-25,-56334384.0,-52591104.0,127748448.0,-18822944.0,-0.441,-0.412,1.000,-0.147
29098,600029,2024-12-25,3648382.0,28199292.0,21910677.0,-53758351.0,0.068,0.525,0.408,-1.000
29099,601288,2024-12-25,15651520.0,42619408.0,38743216.0,-97014160.0,0.161,0.439,0.399,-1.000
29100,600085,2024-12-25,4218893.0,3675895.0,-6378369.0,-1516419.0,0.534,0.466,-0.808,-0.192


In [29]:
mask = df_merged["super_large_order_net_inflow_amount_y"].isin([-1])

# 使用布尔索引来获取符合条件的行
rows_with_1_or_neg1 = df_merged[mask]
rows_with_1_or_neg1.sort_values("date",ascending=False).head(60)

Unnamed: 0,stock_index,date,small_order_net_inflow_amount_x,medium_order_net_inflow_amount_x,large_order_net_inflow_amount_x,super_large_order_net_inflow_amount_x,small_order_net_inflow_amount_y,medium_order_net_inflow_amount_y,large_order_net_inflow_amount_y,super_large_order_net_inflow_amount_y
29101,792,2024-12-25,3712419.0,29096132.0,22771523.0,-55580074.0,0.066794,0.523499,0.409707,-1.0
28998,600061,2024-12-25,14323046.0,7585606.0,12871448.0,-34780100.0,0.411817,0.218102,0.370081,-1.0
28860,600760,2024-12-25,51508026.0,729211.0,7566707.0,-59803944.0,0.861281,0.012193,0.126525,-1.0
28862,601211,2024-12-25,3408457.0,19276163.0,18562144.0,-41246764.0,0.082636,0.467338,0.450027,-1.0
28867,617,2024-12-25,901963.0,4817847.0,7031161.0,-12750970.0,0.070737,0.377842,0.551422,-1.0
28897,601138,2024-12-25,175336400.0,103724832.0,2742912.0,-281804160.0,0.622192,0.368074,0.009733,-1.0
28937,601818,2024-12-25,10065803.0,6023162.0,6620612.0,-22709576.0,0.44324,0.265226,0.291534,-1.0
28955,600031,2024-12-25,279436.0,188626.0,501968.0,-970029.0,0.288069,0.194454,0.517477,-1.0
28960,601006,2024-12-25,122475910.0,36166940.0,19697168.0,-178340021.0,0.686755,0.202798,0.110447,-1.0
28964,977,2024-12-25,189653344.0,101828528.0,57023104.0,-348504960.0,0.544191,0.292187,0.163622,-1.0


In [25]:
rows_with_1_or_neg1[rows_with_1_or_neg1["stock_index"] == 792]

Unnamed: 0,stock_index,date,small_order_net_inflow_amount_x,medium_order_net_inflow_amount_x,large_order_net_inflow_amount_x,super_large_order_net_inflow_amount_x,small_order_net_inflow_amount_y,medium_order_net_inflow_amount_y,large_order_net_inflow_amount_y,super_large_order_net_inflow_amount_y
28351,792,2024-12-20,-52727970.0,-32323701.0,-18687564.0,103739236.0,-0.508274,-0.311586,-0.18014,1.0


In [11]:
rows_with_1_or_neg1[rows_with_1_or_neg1["date"]=='2024-11-01']

Unnamed: 0,stock_index,date,small_order_net_inflow_amount_x,medium_order_net_inflow_amount_x,large_order_net_inflow_amount_x,super_large_order_net_inflow_amount_x,small_order_net_inflow_amount_y,medium_order_net_inflow_amount_y,large_order_net_inflow_amount_y,super_large_order_net_inflow_amount_y
18568,601117,2024-11-01,-9744165.0,-9136335.0,-4927314.0,23807813.0,-0.409284,-0.383754,-0.206962,1.0
18595,600111,2024-11-01,-363630727.0,-344495264.0,-174254240.0,882380224.0,-0.412102,-0.390416,-0.197482,1.0
18602,601319,2024-11-01,-2563461.0,-3825407.0,-15489352.0,21878220.0,-0.11717,-0.17485,-0.70798,1.0
18609,600941,2024-11-01,-71345789.0,-4926208.0,-19659456.0,95931445.0,-0.743716,-0.051351,-0.204932,1.0
18617,600900,2024-11-01,-72923840.0,-102235552.0,-34285936.0,209445328.0,-0.348176,-0.488125,-0.163699,1.0
18618,601888,2024-11-01,-53728576.0,-32502896.0,-503408.0,86734874.0,-0.619458,-0.374738,-0.005804,1.0
18623,601390,2024-11-01,-85650256.0,-37974976.0,-20718848.0,144344067.0,-0.593376,-0.263086,-0.143538,1.0
18635,600050,2024-11-01,-41107040.0,-124090816.0,-17807328.0,183005184.0,-0.224622,-0.678073,-0.097305,1.0
18671,2460,2024-11-01,-105220384.0,-55662144.0,-8137520.0,169020044.0,-0.622532,-0.329323,-0.048145,1.0
18678,601838,2024-11-01,-16335646.0,-14356284.0,-34690967.0,65382897.0,-0.249846,-0.219572,-0.530582,1.0


In [78]:
def large_bigger_super(df1):
    df2 = df1[(df1["large_order_net_inflow_amount"] > df1["super_large_order_net_inflow_amount"])
        & (df1["large_order_net_inflow_amount"] >0)&(df1["super_large_order_net_inflow_amount"]>0)]
    df2["rr"] = df2["large_order_net_inflow_amount"] / \
        df2["super_large_order_net_inflow_amount"]
    df3 = df2[["stock_index","date","rr"]]
    return df3

In [91]:
def mid_bigger_large(df1):
    df2 = df1[(df1["medium_order_net_inflow_amount"] > df1["large_order_net_inflow_amount"])
              & (df1["medium_order_net_inflow_amount"] > 0) & (df1["large_order_net_inflow_amount"] > 0)]
    df2["rr"] = df2["medium_order_net_inflow_amount"] / \
        df2["large_order_net_inflow_amount"]
    df3 = df2[["stock_index", "date", "rr"]]
    return df3

In [92]:
df_list = []
for stock_in in df1["stock_index"].unique():
    try:
        # print(stock_in)
        df2 = df1[df1["stock_index"] == stock_in]
        df_v1 = mid_bigger_large(df2)
        df_list.append(df_v1)
    except:
        pass

df_out = pd.concat(df_list)

In [93]:
df_out1 = df_out[df_out["rr"] > 10]
df_out1.sort_values(["rr"]).tail(30)
#df_out1[df_out1["stock_index"] == 600362]

Unnamed: 0,stock_index,date,rr
9969,300347,2024-08-16,167.815248
8009,300661,2024-08-16,167.815248
11859,300223,2024-08-16,167.815248
7514,300408,2024-08-16,167.815248
3159,300979,2024-08-16,167.815248
7829,300769,2024-08-16,167.815248
7469,300124,2024-08-16,167.815248
3384,300014,2024-08-16,167.815248
3339,300760,2024-08-16,167.815248
5624,300274,2024-08-16,167.815248


In [45]:
def get_stock_ind(df1,start_date,end_date):
    df2 = df1[(df1["date"] >= start_date) & (df1["date"] <= end_date)
              & (df1["super_large_order_net_inflow_amount"] > 0)]


    df2["super_averge_ratio"] = df2["super_large_order_net_inflow_amount"] / \
        df2["super_large_order_net_inflow_amount"].mean()
    df3 = df2[df2["super_averge_ratio"] > 3]
    return df3

In [40]:
df1["stock_index"].unique()[0:2]

array([601100,   2241])

In [48]:
df_list = []
for stock_in in df1["stock_index"].unique():
    try:
        #print(stock_in)
        df2 = df1[df1["stock_index"]==stock_in]
        df_v1 = get_stock_ind(df2, start_date, end_date)
        df_list.append(df_v1)
    except:
        pass

df_out = pd.concat(df_list)
df_out
    

Unnamed: 0,stock_index,date,super_large_order_net_inflow_amount,super_averge_ratio
5096,601100,2024-11-20,80258020.0,4.265283
8506,601100,2024-12-09,131981011.0,7.014083
3297,2241,2024-11-12,793780800.0,3.681632
2398,601901,2024-11-07,152591008.0,3.045570
7755,601901,2024-12-04,178411580.0,3.560924
...,...,...,...,...
8502,600029,2024-12-06,75128644.0,3.357195
10009,601288,2024-12-16,659553952.0,4.131366
4493,300496,2024-11-15,111910315.0,3.486302
3294,600085,2024-11-11,42291124.0,3.003173


In [53]:
df_out1 = df_out[df_out["super_averge_ratio"]>5].sort_values("date", ascending=False)
df_out1.head(3)

Unnamed: 0,stock_index,date,super_large_order_net_inflow_amount,super_averge_ratio
11054,601985,2024-12-23,393845600.0,5.365327
10626,977,2024-12-19,2335988000.0,5.920987
10320,601998,2024-12-18,85967220.0,5.214074


In [61]:
-4658390-10171734+14685301+144823

0

In [60]:
df_kdj = pd.read_csv("/home/davidyu/vscode/data/analysis_data/stock_kdj_daily_last60.csv")
df_kdj1 = df_kdj[["stock_index", "date","kdjj"]]
df_out2 = df_out1.merge(df_kdj1)
df_out2.sort_values("kdjj")

Unnamed: 0,stock_index,date,super_large_order_net_inflow_amount,super_averge_ratio,kdjj
2,601998,2024-12-18,85967220.0,5.214074,-3.666
17,69,2024-11-18,92785580.0,5.374613,9.484
57,600039,2024-10-31,78933100.0,5.195172,12.732
9,601021,2024-11-29,45964600.0,7.169596,16.257
19,600015,2024-11-18,116796100.0,5.399487,36.853
14,2756,2024-11-19,72449040.0,5.460868,37.279
18,601618,2024-11-18,121658600.0,5.305949,40.424
21,601816,2024-11-15,303961200.0,5.333845,44.746
12,2074,2024-11-25,188489400.0,5.475909,45.965
16,600884,2024-11-18,226270600.0,9.606358,48.67


In [25]:
print(today_date)
print(one_month_ago)

2024-12-25
2024-10-26


In [26]:
df2 = df1[(df1["date"] >= start_date) & (df1["date"] <= today_date)
          & (df1["super_large_order_net_inflow_amount"]>0)]

df2["super_averge_ratio"] = df2["super_large_order_net_inflow_amount"] / \
    df2["super_large_order_net_inflow_amount"].mean()
df3 = df2[df2["super_averge_ratio"] > 3]

56987253.722222224

In [29]:

df3

Unnamed: 0,stock_index,date,super_large_order_net_inflow_amount,super_averge_ratio
147,601816,2024-11-15,303961168.0,5.333845
163,601816,2024-12-09,177251472.0,3.11037
167,601816,2024-12-13,183839136.0,3.225969


In [9]:
default_col_name = "super_large_order_net_inflow_amount"
print(default_col_name)
cash_flow_columns = {
    '小单': "small_order_net_inflow_amount",
    '超大单': "super_large_order_net_inflow_amount",
    '大单': "large_order_net_inflow_amount",
    "中单": "medium_order_net_inflow_amount",
}
selected_column = cash_flow_columns.get(
    "中单")

print(default_col_name)
print(selected_column)

super_large_order_net_inflow_amount
super_large_order_net_inflow_amount
medium_order_net_inflow_amount


In [3]:
import pandas as pd
raw_col = ['SECURITY_CODE', 'SECURITY_NAME_ABBR', 'TRADE_MARKET_CODE',
           'TRADE_MARKET', 'SECURITY_TYPE_CODE', 'SECURITY_TYPE', 'UPDATE_DATE',
           'REPORTDATE', 'BASIC_EPS', 'DEDUCT_BASIC_EPS', 'TOTAL_OPERATE_INCOME',
           'PARENT_NETPROFIT', 'WEIGHTAVG_ROE', 'YSTZ', 'SJLTZ', 'BPS', 'MGJYXJJE',
           'XSMLL', 'YSHZ', 'SJLHZ', 'ASSIGNDSCRPT', 'PAYYEAR', 'PUBLISHNAME',
           'ZXGXL', 'NOTICE_DATE', 'ORG_CODE', 'TRADE_MARKET_ZJG', 'ISNEW',
           'QDATE', 'DATATYPE', 'DATAYEAR', 'DATEMMDD', 'EITIME', 'SECUCODE']


col_name = [
    "stock_index",
    "stock_name",
    "TRADE_MARKET_CODE",
    "TRADE_MARKET",
    "SECURITY_TYPE_CODE",
    "SECURITY_TYPE",
    "UPDATE_DATE",
    "REPORTDATE",
    "每股收益",
    "DEDUCT_BASIC_EPS",
    "营业总收入",
    "净利润",
    "净资产收益率",
    "营业总收入同比增长率",
    "净利润同比增长率",
    "每股净资产",
    "每股经营现金流",
    "销售毛利率",
    "营业总收入季度环比增长",
    "净利润季度环比增长",
    "分红",
    "PAYYEAR",
    "行业",
    "ZXGXL",
    "NOTICE_DATE",
    "ORG_CODE",
    "TRADE_MARKET_ZJG",
    "ISNEW",
    "QDATE",
    "DATATYPE",
    "DATAYEAR",
    "DATEMMDD",
    "EITIME",
    "SECUCODE"
]

pd.DataFrame([raw_col, col_name]).T

Unnamed: 0,0,1
0,SECURITY_CODE,stock_index
1,SECURITY_NAME_ABBR,stock_name
2,TRADE_MARKET_CODE,TRADE_MARKET_CODE
3,TRADE_MARKET,TRADE_MARKET
4,SECURITY_TYPE_CODE,SECURITY_TYPE_CODE
5,SECURITY_TYPE,SECURITY_TYPE
6,UPDATE_DATE,UPDATE_DATE
7,REPORTDATE,REPORTDATE
8,BASIC_EPS,每股收益
9,DEDUCT_BASIC_EPS,DEDUCT_BASIC_EPS
