In [1]:
from types import SimpleNamespace 
from datetime import datetime, timedelta
import akshare as ak
import pandas as pd
import multiprocessing
# import threading
import threading
import queue
import psutil
import time
import talib
from concurrent.futures  import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
import numpy as np
# import kline_daily
import requests
# import cloudscraper
import logging
import random

In [2]:
def stock_zh_a_hist_tx(
        symbol: str = "sz000001",
        start_date: str = "19000101",
        end_date: str = "20500101",
        adjust: str = "",
        timeout: float = None,
) -> pd.DataFrame:
    url = "https://proxy.finance.qq.com/ifzqgtimg/appstock/app/newfqkline/get"
    big_df = pd.DataFrame()
    params = {
        "_var": f"kline_day{adjust}{int(start_date[:4])}",
        "param": f"{symbol},day,{start_date},{end_date},640,{adjust}",
        "r": str(random.random()),
    }
    r = requests.get(url, params=params, timeout=timeout)
    data_text = r.text
    data_json = ak.utils.demjson.decode(data_text[data_text.find("={") + 1:])["data"][
        symbol
    ]
    if "day" in data_json.keys():
        temp_df = pd.DataFrame(data_json["day"])
    elif "hfqday" in data_json.keys():
        temp_df = pd.DataFrame(data_json["hfqday"])
    else:
        temp_df = pd.DataFrame(data_json["qfqday"])
    big_df = pd.concat([big_df, temp_df], ignore_index=True)
    
    big_df = big_df.iloc[:, [0,1,2,3,4,5,7]]
    big_df.columns = ["date", "open", "close", "high", "low", "volume","turnover_rate"]
    big_df["date"] = pd.to_datetime(big_df["date"], errors="coerce").dt.date
    big_df["open"] = pd.to_numeric(big_df["open"], errors="coerce")
    big_df["close"] = pd.to_numeric(big_df["close"], errors="coerce")
    big_df["high"] = pd.to_numeric(big_df["high"], errors="coerce")
    big_df["low"] = pd.to_numeric(big_df["low"], errors="coerce")
    big_df["volume"] = pd.to_numeric(big_df["volume"], errors="coerce")
    big_df["turnover_rate"] = pd.to_numeric(big_df["turnover_rate"], errors="coerce")
    big_df.drop_duplicates(inplace=True, ignore_index=True)
    big_df.index = pd.to_datetime(big_df["date"])
    big_df = big_df[start_date:end_date]
    big_df.reset_index(inplace=True, drop=True)
    return big_df

In [3]:
# 配置 root logger：记录 DEBUG 及以上到 all.log 
logging.basicConfig( 
    filename='all.log', 
    filemode='a',
    level=logging.DEBUG,
    encoding='utf-8',
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
 
# 创建 error logger
error_logger = logging.getLogger('error_logger') 
error_logger.setLevel(logging.ERROR)   # 设置 error_logger 只处理 ERROR 及以上级别 
 
# 创建 error.log  的 handler
error_handler = logging.FileHandler('error.log',  encoding='utf-8') 
error_handler.setLevel(logging.ERROR) 
 
# 设置 error 日志格式
error_formatter = logging.Formatter('%(asctime)s - %(message)s')
error_handler.setFormatter(error_formatter) 
 
# 添加 handler 到 error_logger 
error_logger.addHandler(error_handler) 
 
# 防止日志重复传播到 root logger（避免 error 日志出现在 all.log  中两次）
error_logger.propagate  = False
 
# 测试日志
logging.debug(' 这是 root logger 的 DEBUG 日志，写入 all.log') 
logging.info(' 这是 root logger 的 INFO 日志，写入 all.log') 
logging.critical(' 这是 root logger 的 Critical 日志，写入 all.log') 
error_logger.error(' 这是一个 ERROR 日志，写入 error.log  和 all.log （除非 propagate=False）')

In [4]:
logging.error("dwdw")

In [5]:
# total_len = len(df)
physical_cpus = psutil.cpu_count(logical=False)
logical_cpus = psutil.cpu_count(logical=True)

log_phy_ratio = int(logical_cpus/physical_cpus)
# phy_cpu_length = int(total_len/physical_cpus)
args = SimpleNamespace(
    datedelta = 50,
    start_epochs = 8
    )
today = datetime.now() - timedelta(days=1)
start_day = today - timedelta(days=args.datedelta)
# test_day = today - timedelta(days=1)

# 格式化日期为YYYYMMDD格式
# formatted_today = today.strftime('%Y%m%d')
# formatted_test_day = test_day.strftime('%Y%m%d')
# formatted_start_day = start_day.strftime('%Y%m%d')

# 格式化日期为YYYY-MM-DD格式
formatted_today = today.strftime('%Y-%m-%d')
# formatted_test_day = test_day.strftime('%Y-%m-%d')
formatted_start_day = start_day.strftime('%Y-%m-%d')

In [6]:
with open('today_suggestions.txt', 'w', encoding='utf-8') as today_suggestions,open('history_suggestions.txt', 'a', encoding='utf-8') as history_suggestions:
    today_suggestions.write(formatted_today + "!!!!!!!!~~~~~~~~~~~~~~\n")
    history_suggestions.write(formatted_today + "!!!!!!!!!!!~~~~~~~~~~~~\n")
# df = pd.read_csv('data.csv',  dtype={0: str})
df = pd.read_csv('mainboard_stocks_with_prefix.csv', dtype={'code': str})

In [7]:
stock_codes = list(set(df['code']))
print(len(df) == len(stock_codes))
df.head()

True


Unnamed: 0,code,name
0,sz000001,平安银行
1,sz000002,万 科Ａ
2,sz000004,*ST国华
3,sz000006,深振业Ａ
4,sz000007,全新好


In [8]:
def stock_data_getter(stock_codes, formatted_start_day, formatted_today):
    res_ls = []
    for stock_code in stock_codes:
        # time.sleep(0.8) #avoid abandon from remote
        try:
            stock_zh_a_hist_df = stock_zh_a_hist_tx(symbol=stock_code, start_date=formatted_start_day, end_date=formatted_today)
            if stock_zh_a_hist_df.empty:
                print("wrong code:",stock_code)
            else:
                res_ls.append((stock_code, stock_zh_a_hist_df))
        except Exception as e:
            print("exception:",e)
            logging.error(e)
    print("stock code:",stock_codes[-1],"data collection finished:", time.time())
    return res_ls

In [9]:
aspls = np.array_split(stock_codes, logical_cpus)
# 然后每个子数组是numpy数组，可以转成列表
chunked_list = [arr.tolist() for arr in aspls]

process_lock = multiprocessing.Lock()

stock_data = []


with ThreadPoolExecutor(max_workers=logical_cpus) as executor:
    futures = [executor.submit(stock_data_getter, stock_codes_ls, formatted_start_day, formatted_today)
        for stock_codes_ls in chunked_list]
    for future in as_completed(futures):
        try:
            result = future.result() 
            stock_data += result
        except Exception as e:
            print("Error in got results thread:", e)

stock code: sh603458 data collection finished: 1758416844.0196476
stock code: sz000676 data collection finished: 1758416846.3742354
stock code: sh601166 data collection finished: 1758416846.4352329
stock code: sh605100 data collection finished: 1758416848.6409843
stock code: sz000550 data collection finished: 1758416848.692331
stock code: sz000779 data collection finished: 1758416849.1004617
stock code: sh600128 data collection finished: 1758416849.5760703
stock code: sh603958 data collection finished: 1758416850.5739007
stock code: sh603115 data collection finished: 1758416850.9257543
stock code: sz000151 data collection finished: 1758416851.1916835
stock code: sz002191 data collection finished: 1758416851.3470726
stock code: sz002932 data collection finished: 1758416851.634125
stock code: sh600469 data collection finished: 1758416851.8037398
stock code: sz002688 data collection finished: 1758416851.8308196
stock code: sz002881 data collection finished: 1758416851.9325323
stock code: 

In [10]:
print(len(stock_data),len(stock_codes))

3032 3032


In [11]:
print(stock_data[0][1])

          date  open  close  high   low     volume  turnover_rate
0   2025-08-01  3.18   3.20  3.22  3.17   138465.0           1.06
1   2025-08-04  3.19   3.21  3.21  3.16   115233.0           0.88
2   2025-08-05  3.22   3.22  3.24  3.20   171710.0           1.31
3   2025-08-06  3.21   3.19  3.22  3.17   143996.0           1.10
4   2025-08-07  3.18   3.18  3.21  3.17   148640.0           1.13
5   2025-08-08  3.18   3.22  3.23  3.17   230430.0           1.76
6   2025-08-11  3.22   3.26  3.27  3.19   164850.0           1.26
7   2025-08-12  3.25   3.24  3.28  3.22   177988.0           1.36
8   2025-08-13  3.25   3.22  3.27  3.21   158563.0           1.21
9   2025-08-14  3.21   3.13  3.22  3.12   201174.0           1.53
10  2025-08-15  3.13   3.14  3.16  3.12   177622.0           1.35
11  2025-08-18  3.16   3.19  3.30  3.15   369646.0           2.82
12  2025-08-19  3.20   3.28  3.30  3.18   423284.0           3.23
13  2025-08-20  3.28   3.34  3.34  3.25   346182.0           2.64
14  2025-0

In [12]:
# sdaspls = np.array_split(np.array(stock_data,  dtype=object), physical_cpus)
# sd_chunked_list = [arr.tolist() for arr in sdaspls]

In [13]:
def check_parameter(stock_codes_ls):
    count = 0
    for stock_code, stock_zh_df in stock_codes_ls:
        open = stock_zh_df["open"]
        close = stock_zh_df["close"]
        high = stock_zh_df["high"]
        low = stock_zh_df["low"]
        volume = stock_zh_df["volume"]
        turnover = stock_zh_df["turnover_rate"]

        ma5 = talib.EMA(close, timeperiod=5)
        ma10 = talib.EMA(close, timeperiod=10)
        vol_ma5 = talib.EMA(volume, timeperiod=5)
        atr = talib.NATR(high, low, close, timeperiod=8) #波动
        atr_ma = talib.EMA(atr, timeperiod=5)
        macd, macdsignal, macdhist = talib.MACD(close, fastperiod=7, slowperiod=18, signalperiod=6)

        rsi = talib.RSI(close, timeperiod=14)  # RSI相对强弱指标
        cci = talib.CCI(high, low, close, timeperiod=20)  # 顺势指标
        # money_flow = (2*close - low - high) / (high - low) * volume  # 简易资金流
        money_flow = talib.MFI(high, low, close, volume, timeperiod=9)
        # money_flow_max= money_flow.shift(3).rolling(4).max().iloc[-1]
        # turnover_mean= money_flow.shift(1).rolling(5).mean().iloc[-1]
        today_candlestick = abs(close.iloc[-3] - open.iloc[-3])
        yesterday_candlestick = abs(close.iloc[-4] - open.iloc[-4])
        
        if (close.iloc[-3] > ma5.iloc[-3] and (ma5.iloc[3] > ma10.iloc[-3]  or (ma5.iloc[-3] > ma5.iloc[-4] > ma5.iloc[-5]))) and \
            rsi.iloc[-3] < 60 and atr.iloc[-3]  > atr_ma.iloc[-3] and open.iloc[-3]  < close.iloc[-3]  * 1.03 and \
                volume.iloc[-3] >= vol_ma5.iloc[-3] * 1.5 and turnover.iloc[-3] > 3 and \
                (macd.iloc[-3] > macdsignal.iloc[-3] and macd.iloc[-4] < macdsignal.iloc[-4] and macdhist.iloc[-3] > abs(macdhist.iloc[-4])):
            if close.iloc[-1] > close.iloc[-2]:
                print(f"""stock_code: {stock_code} result: 趋势启动+量价齐升型 \n """)
            else:
                print(f"""XXXXXXstock_code: {stock_code} wrong result: 趋势启动+量价齐升型 \n """)
                count += 1
        elif (rsi.iloc[-3] < 35) and (cci.iloc[-3] < -100) and \
            today_candlestick > 0.5 * yesterday_candlestick and turnover.iloc[-1] > 3 and \
            money_flow.iloc[-3] > money_flow.iloc[-4]:
            if close.iloc[-1] > close.iloc[-2]:
                print(f"""stock_code: {stock_code} result: 超跌反转+资金异动型 \n """)
            else:
                print(f"""XXXXXXstock_code: {stock_code} wrong result: 超跌反转+资金异动型 \n """)
                count += 1
    print("wrong count", count)

check_parameter(stock_data)

stock_code: sz002453 result: 趋势启动+量价齐升型 
 
XXXXXXstock_code: sh600120 wrong result: 趋势启动+量价齐升型 
 
XXXXXXstock_code: sz000151 wrong result: 趋势启动+量价齐升型 
 
XXXXXXstock_code: sz002174 wrong result: 超跌反转+资金异动型 
 
stock_code: sz002319 result: 超跌反转+资金异动型 
 
stock_code: sh603007 result: 趋势启动+量价齐升型 
 
XXXXXXstock_code: sh600390 wrong result: 趋势启动+量价齐升型 
 
XXXXXXstock_code: sz002803 wrong result: 趋势启动+量价齐升型 
 
wrong count 5


In [14]:
# with ProcessPoolExecutor(max_workers=physical_cpus) as executor:
#     futures = [executor.submit(check_parameter, sd_ls)
#         for sd_ls in sd_chunked_list]
#     for future in as_completed(futures):
#         try:
#             future.result() 
#         except Exception as e:
#             print("Error in process:", e)


In [15]:
def run_analysis(stock_codes_ls):
    res_ls = []
    for stock_code, stock_zh_df in stock_codes_ls:
        open = stock_zh_df["open"]
        close = stock_zh_df["close"]
        high = stock_zh_df["high"]
        low = stock_zh_df["low"]
        volume = stock_zh_df["volume"]
        turnover = stock_zh_df["turnover_rate"]

        ma5 = talib.EMA(close, timeperiod=5)
        ma10 = talib.EMA(close, timeperiod=10)
        vol_ma5 = talib.EMA(volume, timeperiod=5)
        atr = talib.NATR(high, low, close, timeperiod=8)
        atr_ma = talib.EMA(atr, timeperiod=5)
        macd, macdsignal, macdhist = talib.MACD(close, fastperiod=7, slowperiod=18, signalperiod=6)#快速EMA：6~8，慢速EMA：15~20，信号线：5~7

        rsi = talib.RSI(close, timeperiod=14)  # RSI相对强弱指标
        cci = talib.CCI(high, low, close, timeperiod=20)  # 顺势指标
        # money_flow = (2*close - low - high) / (high - low) * volume  # 简易资金流
        money_flow = talib.MFI(high, low, close, volume, timeperiod=9)
        # money_flow_max= money_flow.shift(1).rolling(4).max().iloc[-1]
        # turnover_mean= money_flow.shift(1).rolling(5).mean().iloc[-1]
        today_candlestick = abs(close.iloc[-1] - open.iloc[-1])
        yesterday_candlestick = abs(close.iloc[-2] - open.iloc[-2])

        if close.iloc[-1] > ma5.iloc[-1] and (ma5.iloc[1] > ma10.iloc[-1] or (ma5.iloc[-1] > ma5.iloc[-2] > ma5.iloc[-3])) and \
                rsi.iloc[-1] < 60 and atr.iloc[-1]  > atr_ma.iloc[-1] and open.iloc[-1]  < close.iloc[-1]  * 1.03 and \
                volume.iloc[-1] >= vol_ma5.iloc[-1] * 1.5 and turnover.iloc[-1] > 3 and \
                macd.iloc[-1] > macdsignal.iloc[-1] and macd.iloc[-2] < macdsignal.iloc[-2] and macdhist.iloc[-1] > abs(macdhist.iloc[-2]):
            res_ls.append(f"""stock_code: {stock_code} result: 趋势启动+量价齐升型 \n """)
        elif (rsi.iloc[-1] < 35) and (cci.iloc[-1] < -100) and \
            today_candlestick > 0.5 * yesterday_candlestick and turnover.iloc[-1] > 3 and \
            money_flow.iloc[-1] > money_flow.iloc[-2]:
            res_ls.append(f"""stock_code: {stock_code} result: 超跌反转+资金异动型 \n """)

    for result in res_ls:
        print(result)
    # with open('today_suggestions.txt', 'a', encoding='utf-8') as today_suggestions,open('history_suggestions.txt', 'a', encoding='utf-8') as history_suggestions:
    #     for result in res_ls:
    #         today_suggestions.write(result)
    #         history_suggestions.write(result)


run_analysis(stock_data)

stock_code: sh600530 result: 趋势启动+量价齐升型 
 
stock_code: sz002370 result: 超跌反转+资金异动型 
 


In [16]:
# with ProcessPoolExecutor(max_workers=physical_cpus) as executor:
#     futures = [executor.submit(run_analysis, args=(stock_codes_ls, process_lock))
#         for stock_codes_ls in chunked_list]
#     for future in as_completed(futures):
#         try:
#             future.result() 
#         except Exception as e:
#             print("Error in thread:", e)


In [17]:
# def validate_proxy(proxies, result_queue):
#     """验证代理有效性 [6]()"""
#     test_url = "http://icanhazip.com"   # 测试网站 
#     for proxy in proxies:
#         # print("Testing proxy:", proxy)
#         try:
#             start_time = time.time() 
#             resp = requests.get(test_url,  proxies=proxy, timeout=5)
#             latency = int((time.time()  - start_time))  # 计算延迟 
#             # print(resp.status_code, type(resp.status_code),resp.text,proxy)
#             if resp.status_code  == 200:
#                 result_queue.put((latency, proxy))
#         except Exception:
#             pass

# proxies_map = []
# response = requests.get('https://api.proxyscrape.com/v4/free-proxy-list/get?request=display_proxies&proxy_format=protocolipport&format=text',  timeout=30)
# if response.status_code == 200:
#     proxies = response.text.replace("socks4",  "https")
#     proxies_ls = [proxy for proxy in proxies.split('\r\n')  if proxy != ""]

#     for url in proxies_ls:
#         try:
#             # 按 "://" 分割协议和地址（最多分割1次）
#             protocol, _address = url.split('://',  1)
#             if protocol == "http":
#                 proxies_map.append({protocol: url})
#         except ValueError:
#             # 处理无效格式（如缺少 ://）
#             print(f"跳过无效URL: {url}")
#     print(proxies_map)
# aspls = np.array_split(proxies_map, logical_cpus)
# # 然后每个子数组是numpy数组，可以转成列表
# chunked_list = [arr.tolist() for arr in aspls]

# validate_proxies_queue = queue.Queue()
# validate_proxies_ls = []

# process_lock = multiprocessing.Lock()

# with ThreadPoolExecutor(max_workers=logical_cpus) as executor:
#     futures = [executor.submit(validate_proxy, proxies, validate_proxies_queue)
#         for proxies in chunked_list]
#     for future in as_completed(futures):
#         try:
#             future.result()
#         except Exception as e:
#             print("Error in got results thread:", e)

# while not validate_proxies_queue.empty():
#     validate_proxies_ls.append(validate_proxies_queue.get())

# validate_proxies_ls =sorted(validate_proxies_ls, key=lambda x: x[0])
# for i in validate_proxies_ls:
#     validate_proxies_queue.put(i[1])

# print(validate_proxies_ls)
# print(len(validate_proxies_ls))
# print(validate_proxies_queue.empty())

In [18]:
# def stock_data_getter_with_proxies(stock_codes, formatted_start_day, formatted_today, validate_proxies_queue):
#     res_ls = []
#     if validate_proxies_queue.empty():
#         return []
#     proxy = validate_proxies_queue.get()
#     i = 0
#     while i < len(stock_codes):
#         time.sleep(0.8) #avoid abandon from remote
#         stock_code = stock_codes[i]
#         i += 1
#         stock_zh_a_hist_df = kline_daily.stock_zh_a_hist_with_proxy(symbol=stock_code, start_date=formatted_start_day, end_date=formatted_today, proxy={"http":proxy["http"]})
#         if stock_zh_a_hist_df is None:
#             if validate_proxies_queue.empty():
#                 print("proxy ran out")
#                 return []
#             proxy = validate_proxies_queue.get()
#             i -= 1
#         elif stock_zh_a_hist_df.empty:
#             print("wrong code:",stock_code)
#         else:
#             res_ls.append((stock_code, stock_zh_a_hist_df))
#     print("stock code:",stock_codes[-1],"data collection finished:", time.time())
#     return res_ls

In [19]:
# collect_and_analyze_data(["000001","000002"], log_phy_ratio, formatted_start_day, formatted_today, lock, run_analysis)

In [20]:
# sd = ak.stock_zh_a_hist(symbol="000001", period="daily", start_date=formatted_start_day, end_date=formatted_today, adjust="")
# print(sd)
# df_shanghai = ak.index_zh_a_hist( 
#     symbol="000001",      # 上证指数代码（固定为000001）
#     period="daily",       # 数据周期：daily（日线）、weekly（周线）、monthly（月线）
#     start_date="20200101", # 起始日期（格式：YYYYMMDD）
#     end_date="20250904",   # 结束日期（默认为当前日期）
# )

# # 查看前5行数据 
# print(df_shanghai.head()) 

In [21]:
# def get_mainboard_stocks_ak():
#     """使用akshare获取主板股票列表"""
#     # 获取所有上市公司的基本信息
#     stock_info = ak.stock_info_a_code_name()
    
#     # 筛选主板股票
#     mainboard_stocks = stock_info[stock_info['code'].str.startswith(('600', '601', '603', '605', '000', '002'))]
    
#     return mainboard_stocks

# # 获取主板股票
# mainboard_stocks_ak = get_mainboard_stocks_ak()
# print(mainboard_stocks_ak.head())
# mainboard_stocks_ak.to_csv('mainboard_stocks.csv', index=False, encoding='utf-8-sig')
# def add_stock_prefix(code):
#     code_str = str(code).zfill(6)  # 确保代码为6位字符串 
#     if code_str.startswith('6'):    # 上证
#         return 'sh' + code_str
#     elif code_str.startswith(('0',  '3')):  # 深证
#         return 'sz' + code_str 
#     return code_str  # 其他情况保留原格式 
# mainboard_stocks_ak['code'] = mainboard_stocks_ak['code'].apply(add_stock_prefix)
# mainboard_stocks_ak.to_csv('mainboard_stocks_with_prefix.csv', index=False, encoding='utf-8-sig')

In [22]:
# # date="20200331"; choice of {"XXXX0331", "XXXX0630", "XXXX0930", "XXXX1231"}; 从 20081231 开始
# stock_yjyg_em_df = ak.stock_yjyg_em(date="20250630")
# stock_yjyg_em_df_sorted_desc = stock_yjyg_em_df.sort_values(by=stock_yjyg_em_df.columns[6], ascending=False) #'业绩变动幅度'
# # print(stock_yjyg_em_df_sorted_desc.head(10))
# print(stock_yjyg_em_df_sorted_desc.iloc[:, [1,6]].head(10))

In [23]:
# class ProxyManager:
#     """代理IP管理器"""

#     def __init__(self, proxy_api_url, max_retry=3):
#         self.proxy_api_url = proxy_api_url
#         self.max_retry = max_retry

#     def get_valid_proxy(self):
#         """获取有效的代理IP"""
#         for attempt in range(self.max_retry):
#             try:
#                 resp = requests.get(self.proxy_api_url, timeout=5)
#                 proxy_json = resp.json()
#                 proxy_data = proxy_json["data"][0]
#                 server = proxy_data["server"]
#                 ip, port = server.split(":")
#                 proxy = {"http": f"http://{ip}:{port}", "https": f"http://{ip}:{port}"}

#                 # 验证代理可用性
#                 test_url = "http://quote.eastmoney.com"
#                 test = requests.get(test_url, proxies=proxy, timeout=5)
#                 if test.status_code == 200:
#                     print(f"代理可用: {ip}:{port}")
#                     return proxy
#             except Exception as e:
#                 print(f"获取代理失败，第{attempt + 1}次尝试: {e}")

#         print(f"未能获取有效代理")
#         return None