In [18]:
from types import SimpleNamespace 
from datetime import datetime, timedelta
import akshare as ak
import pandas as pd
import multiprocessing
import threading
import queue
import psutil
import time
import talib
from concurrent.futures  import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
import numpy as np

In [19]:
args = SimpleNamespace(
    datedelta = 50,
    start_epochs = 8
    )
today = datetime.now()
start_day = today - timedelta(days=args.datedelta)
test_day = today - timedelta(days=1)

# 格式化日期为YYYYMMDD格式
formatted_today = today.strftime('%Y%m%d')
formatted_test_day = test_day.strftime('%Y%m%d')
formatted_start_day = start_day.strftime('%Y%m%d')

with open('today_suggestions.txt', 'w', encoding='utf-8') as today_suggestions,open('history_suggestions.txt', 'a', encoding='utf-8') as history_suggestions:
    today_suggestions.write(formatted_today + "!!!!!!!!~~~~~~~~~~~~~~\n")
    history_suggestions.write(formatted_today + "!!!!!!!!!!!~~~~~~~~~~~~\n")
# df = pd.read_csv('data.csv',  dtype={0: str})
df = pd.read_csv('mainboard_stocks.csv', dtype={'code': str})

In [20]:
stock_codes = list(set(df['code']))
print(len(df) == len(stock_codes))
df.head()

True


Unnamed: 0,code,name
0,1,平安银行
1,2,万 科Ａ
2,4,*ST国华
3,6,深振业Ａ
4,7,全新好


In [21]:
total_len = len(df)
physical_cpus = psutil.cpu_count(logical=False)
logical_cpus = psutil.cpu_count(logical=True)

log_phy_ratio = int(logical_cpus/physical_cpus)
# phy_cpu_length = int(total_len/physical_cpus)


In [22]:
print(log_phy_ratio)

1


In [23]:
def stock_data_getter(stock_codes, formatted_start_day, formatted_today, data_queue):
    for stock_code in stock_codes:
        stock_zh_a_hist_df = ak.stock_zh_a_hist(symbol=stock_code, period="daily", start_date=formatted_start_day, end_date=formatted_today, adjust="")
        if stock_zh_a_hist_df.empty:
            print("wrong code:",stock_code)
        else:
            data_queue.put((stock_code, stock_zh_a_hist_df))


def collect_and_analyze_data(stock_codes, log_phy_ratio, formatted_start_day, formatted_today, lock, run_analysis):
    # threads = []
    data_queue = queue.Queue()

    if log_phy_ratio > 1:
        aspls = np.array_split(stock_codes,  log_phy_ratio)
        # 然后每个子数组是numpy数组，可以转成列表
        chunked_list = [arr.tolist() for arr in aspls]
        with ThreadPoolExecutor(max_workers=len(chunked_list)) as executor:
            futures = [executor.submit(stock_data_getter,  chunk, formatted_start_day, formatted_today, data_queue)
                for chunk in chunked_list]
            for future in futures:
                try:
                    future.result() 
                except Exception as e:
                    print("Error in thread:", e)
    else:
        stock_data_getter(stock_codes, formatted_start_day, formatted_today, data_queue)

    # # collect data
    # for i in chunked_list:
    #     t = threading.Thread(target=stock_data_getter, args=(i, formatted_start_day, formatted_today, data_queue))
    #     threads.append(t)
    #     t.start()
 
    # for t in threads:
    #     t.join(timeout=10)
 
    print("stock code:",stock_codes[0],"data collection finished:", time.time())
    res_ls = []
    while not data_queue.empty(): 
        try:
            stock_code, stock_data = data_queue.get_nowait()
            # data_queue.task_done()
            print("got code:",stock_code)
            suggested, res = run_analysis(stock_data)
            if suggested:
                res_ls.append((stock_code, res))
        except Exception as e:
            print("the exception is:",e)
            print("stock code:",stock_codes[0],"data calculation finished:", time.time())
    with lock:
        with open('today_suggestions.txt', 'a', encoding='utf-8') as today_suggestions,open('history_suggestions.txt', 'a', encoding='utf-8') as history_suggestions:
            for stock_code, result in res_ls:
                today_suggestions.write(str(stock_code) + " ")
                words = f"""
                stock_code: {stock_code}
                result: {result}
                \n
                """
                sentence = " ".join(words)
                history_suggestions.write(sentence)
    return

In [24]:
def run_analysis(stock_zh_df):
    open = stock_zh_df["开盘"]
    close = stock_zh_df["收盘"]
    high = stock_zh_df["最高"]
    low = stock_zh_df["最低"]
    volume = stock_zh_df["成交量"]

    ma5 = talib.SMA(close[-5:], timeperiod=5)
    ma10 = talib.SMA(close[-10:], timeperiod=10)
    vol_ma5 = talib.SMA(volume[-5:], timeperiod=5)

    macd, macdsignal, macdhist = talib.MACDEXT(close, fastperiod=12, fastmatype=0, slowperiod=20, slowmatype=0, signalperiod=9, signalmatype=0)

    rsi = talib.RSI(close, timeperiod=14)  # RSI相对强弱指标
    cci = talib.CCI(high, low, close, timeperiod=20)  # 顺势指标
    # money_flow = (2*close - low - high) / (high - low) * volume  # 简易资金流
    money_flow = talib.MFI(high, low, close, volume, timeperiod=9)
    money_flow_max= money_flow.shift(1).rolling(4).max()
    today_candlestick = (close.iloc[-1] - open.iloc[-1])
    yesterday_candlestick = (close.iloc[-2] - open.iloc[-2])

    if (close.iloc[-1] > ma5.iloc[-1] and ma5.iloc[1] > ma10.iloc[-1]) and \
            volume.iloc[-1] > vol_ma5.iloc[-1] * 1.8 and \
            (macd.iloc[-1] > macdsignal.iloc[-1] and macd.iloc[-2] < macdsignal.iloc[-2] and macdhist.iloc[-1] > abs(macdhist.iloc[-2])):
        return True, "趋势动量+量能突破型"
    elif (rsi.iloc[-1] <30) and (cci.iloc[-1] < -150) and \
        today_candlestick > 0.5 * yesterday_candlestick and \
        money_flow.iloc[-1] > money_flow_max:
        return True, "超跌反转+资金异动型"
    else:
        return False, None

In [None]:
aspls = np.array_split(stock_codes,  physical_cpus)
# 然后每个子数组是numpy数组，可以转成列表
chunked_list = [arr.tolist() for arr in aspls]

lock = multiprocessing.Lock()

with ProcessPoolExecutor(max_workers=physical_cpus) as executor:
    futures = [executor.submit(collect_and_analyze_data, args=(stock_codes_ls, log_phy_ratio, formatted_start_day, formatted_today, lock, run_analysis))
        for stock_codes_ls in chunked_list]
    for future in as_completed(futures):
        try:
            future.result() 
        except Exception as e:
            print("Error in thread:", e)


In [None]:
# collect_and_analyze_data(["000001","000002"], log_phy_ratio, formatted_start_day, formatted_today, lock, run_analysis)

stock code: 000001 data collection finished: 1757068636.1165752
got code: 000001
got code: 000002


In [27]:
# # ak.stock_zh_a_hist(symbol="000001", period="daily", start_date=formatted_start_day, end_date=formatted_today, adjust="")
# df_shanghai = ak.index_zh_a_hist( 
#     symbol="000001",      # 上证指数代码（固定为000001）
#     period="daily",       # 数据周期：daily（日线）、weekly（周线）、monthly（月线）
#     start_date="20200101", # 起始日期（格式：YYYYMMDD）
#     end_date="20250904",   # 结束日期（默认为当前日期）
# )

# # 查看前5行数据 
# print(df_shanghai.head()) 

In [28]:
# def get_mainboard_stocks_ak():
#     """使用akshare获取主板股票列表"""
#     # 获取所有上市公司的基本信息
#     stock_info = ak.stock_info_a_code_name()
    
#     # 筛选主板股票
#     mainboard_stocks = stock_info[stock_info['code'].str.startswith(('600', '601', '603', '605', '000', '002'))]
    
#     return mainboard_stocks

# # 获取主板股票
# mainboard_stocks_ak = get_mainboard_stocks_ak()
# print(mainboard_stocks_ak.head())
# mainboard_stocks_ak.to_csv('mainboard_stocks.csv', index=False, encoding='utf-8-sig')

In [29]:
# # date="20200331"; choice of {"XXXX0331", "XXXX0630", "XXXX0930", "XXXX1231"}; 从 20081231 开始
# stock_yjyg_em_df = ak.stock_yjyg_em(date="20250630")
# stock_yjyg_em_df_sorted_desc = stock_yjyg_em_df.sort_values(by=stock_yjyg_em_df.columns[6], ascending=False) #'业绩变动幅度'
# # print(stock_yjyg_em_df_sorted_desc.head(10))
# print(stock_yjyg_em_df_sorted_desc.iloc[:, [1,6]].head(10))