In [40]:
import pandas as pd
import numpy as np
import os
from glob import glob
from typing import List, Dict, Optional
from datetime import datetime

def _str_to_date(date_str: str) -> datetime.date:
    return datetime.strptime(date_str, "%Y-%m-%d").date()

class DataLoader:
    def __init__(self, 
                 daily_data_path: str = r"D:\workspace\xiaoyao\data\widetable.parquet",
                 minutely_data_root: str = r"D:\workspace\xiaoyao\data\stock_minutely_price\stock_minutely_price"):
        self.daily_data_path = daily_data_path
        self.minutely_data_root = minutely_data_root
        self.daily_df = None
        self.minutely_cache = {}
        
        self.all_stock_codes = [
            path.split("=")[-1] for path in glob(f"{minutely_data_root}/stock_code=*")
        ]
        print(f"[DataLoader] 初始化完成：检测到{len(self.all_stock_codes)}只股票")

    def load_daily_data(self, start_date: Optional[str] = None, end_date: Optional[str] = None) -> pd.DataFrame:
        if self.daily_df is None:
            self.daily_df = pd.read_parquet(self.daily_data_path)
            self.daily_df['date'] = self.daily_df['date'].apply(_str_to_date)
            print(f"[DataLoader] 宽表加载完成：{len(self.daily_df)}行数据")
        
        filtered_df = self.daily_df.copy()
        if start_date:
            filtered_df = filtered_df[filtered_df['date'] >= _str_to_date(start_date)]
        if end_date:
            filtered_df = filtered_df[filtered_df['date'] <= _str_to_date(end_date)]
        
        print(f"[DataLoader] 筛选后数据：{start_date or '开始'}至{end_date or '结束'}，共{len(filtered_df)}行")
        return filtered_df

    def load_minutely_data(self, stock_codes: List[str], date: Optional[str] = None) -> Dict[str, pd.DataFrame]:
        result = {}
        target_date = _str_to_date(date) if date else None
        
        for code in stock_codes:
            code_path = os.path.join(self.minutely_data_root, f"stock_code={code}", "data.parquet")
            if not os.path.exists(code_path):
                print(f"[DataLoader] 警告：{code}的分钟K文件不存在（路径：{code_path}）")
                continue
            
            if code in self.minutely_cache:
                df = self.minutely_cache[code]
            else:
                df = pd.read_parquet(code_path)
                df['date'] = df['date'].apply(_str_to_date)
                df['time'] = pd.to_datetime(df['time']).dt.time
                self.minutely_cache[code] = df
            
            if target_date:
                df_filtered = df[df['date'] == target_date].copy()
            else:
                df_filtered = df.copy()
            result[code] = df_filtered
        
        return result

    def clear_cache(self):
        self.minutely_cache = {}
        print(f"[DataLoader] 缓存已清空")

In [41]:
import pandas as pd
import os
import numpy as np
from typing import List, Dict

class HotspotAnalyzer:
    def __init__(self, daily_df: pd.DataFrame):
        self.daily_df = daily_df.copy()
        self.daily_df['concepts'] = self.daily_df['concept_name_list'].apply(self._parse_concepts)
        if 'pre_close' not in self.daily_df.columns:
            self.daily_df = self.daily_df.sort_values(['stock_code', 'date'])
            self.daily_df['pre_close'] = self.daily_df.groupby('stock_code')['close'].shift(1)
        self.daily_df['pct_change'] = (self.daily_df['close'] / self.daily_df['pre_close']) - 1
        print(f"[HotspotAnalyzer] 初始化完成：日期范围{self.daily_df['date'].min()}至{self.daily_df['date'].max()}")

    def _parse_concepts(self, concept_data) -> List[str]:
        if isinstance(concept_data, np.ndarray):
            return [str(c).strip() for c in concept_data if str(c).strip()]
        elif isinstance(concept_data, list):
            return [str(c).strip() for c in concept_data if str(c).strip()]
        else:
            return []

    def get_hot_industries(self, date: str, top_n: int = 5) -> List[str]:
        date_obj = _str_to_date(date)
        df_day = self.daily_df[self.daily_df['date'] == date_obj]
        if df_day.empty:
            print(f"[HotspotAnalyzer] 警告：{date}无行业数据")
            return []
        
        industry_metrics = df_day.groupby('zjw_industry_name').agg(
            avg_pct_change=('pct_change', 'mean'),
            stock_count=('stock_code', 'nunique'),
            # 新增：板块内上涨股票占比（增强热点强度）
            up_ratio=('pct_change', lambda x: (x > 0).sum() / len(x))
        ).reset_index()
        # 新增筛选：平均涨幅≥1% + 上涨占比≥50%（排除弱热点）
        industry_metrics = industry_metrics[
            (industry_metrics['stock_count'] >= 3) &
            (industry_metrics['avg_pct_change'] >= 0.01) &
            (industry_metrics['up_ratio'] >= 0.5)
        ]
        if industry_metrics.empty:
            return []
        
        industry_metrics['hot_score'] = industry_metrics['avg_pct_change'].rank(ascending=False) + \
                                       industry_metrics['up_ratio'].rank(ascending=False)
        return industry_metrics.sort_values('hot_score').head(top_n)['zjw_industry_name'].tolist()

    def get_hot_concepts(self, date: str, top_n: int = 5) -> List[str]:
        date_obj = _str_to_date(date)
        df_day = self.daily_df[self.daily_df['date'] == date_obj]
        if df_day.empty:
            print(f"[HotspotAnalyzer] 警告：{date}无概念数据")
            return []
        
        concept_list = []
        for _, row in df_day.iterrows():
            if row.get('paused', 0) == 1 or len(row['concepts']) == 0:
                continue
            for concept in row['concepts']:
                concept_list.append({'concept': concept, 'pct_change': row['pct_change']})
        
        if not concept_list:
            return []
        concept_stats = pd.DataFrame(concept_list).groupby('concept').agg(
            avg_pct_change=('pct_change', 'mean'),
            stock_count=('concept', 'count'),
            up_ratio=('pct_change', lambda x: (x > 0).sum() / len(x))  # 新增上涨占比
        ).reset_index()
        # 新增筛选：平均涨幅≥1.5% + 上涨占比≥60%（强化热点强度）
        concept_stats = concept_stats[
            (concept_stats['stock_count'] >= 3) &
            (concept_stats['avg_pct_change'] >= 0.015) &
            (concept_stats['up_ratio'] >= 0.6)
        ]
        if concept_stats.empty:
            return []
        
        concept_stats['hot_score'] = concept_stats['avg_pct_change'].rank(ascending=False) + \
                                     concept_stats['up_ratio'].rank(ascending=False)
        return concept_stats.sort_values('hot_score').head(top_n)['concept'].tolist()

    def save_hot_candidates(self, date: str, save_path: str = ".") -> List[str]:
        date_obj = _str_to_date(date)
        hot_industries = self.get_hot_industries(date)
        hot_concepts = self.get_hot_concepts(date)
        df_day = self.daily_df[self.daily_df['date'] == date_obj]
        
        industry_mask = df_day['zjw_industry_name'].isin(hot_industries)
        concept_mask = df_day['concepts'].apply(lambda x: len(set(x) & set(hot_concepts)) > 0)
        candidate_df = df_day[industry_mask | concept_mask][[
            'stock_code', 'stock_name', 'zjw_industry_name', 'concepts', 'pct_change'
        ]].copy()
        
        candidate_df['t_date'] = date
        candidate_df['hot_industries'] = str(hot_industries)
        candidate_df['hot_concepts'] = str(hot_concepts)
        
        filename = os.path.join(save_path, f"hot_candidates_{date}.csv")
        candidate_df.to_csv(filename, index=False, encoding="utf-8-sig")
        print(f"[HotspotAnalyzer] 热点候选股已保存至：{filename}（共{len(candidate_df)}只）")
        return candidate_df['stock_code'].unique().tolist()

    def check_hotspot_persistence(self, hotspot_type: str, hotspot_name: str, t_date: str) -> bool:
        """新增：验证热点在T+1日早盘的持续性（避免单日轮动）
        :param hotspot_type: 'industry'（行业）或'concept'（概念）
        :param hotspot_name: 热点名称（如“零售业”）
        :param t_date: T日日期
        :return: 若T+1日该热点早盘平均涨幅≥0.5%，返回True
        """
        t_plus_1_date = (datetime.strptime(t_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
        t_plus_1_obj = _str_to_date(t_plus_1_date)
        df_t1 = self.daily_df[self.daily_df['date'] == t_plus_1_obj]
        if df_t1.empty:
            return False
        
        # 筛选该热点的股票
        if hotspot_type == 'industry':
            df_hotspot = df_t1[df_t1['zjw_industry_name'] == hotspot_name]
        else:  # concept
            df_hotspot = df_t1[df_t1['concepts'].apply(lambda x: hotspot_name in x)]
        
        if len(df_hotspot) < 3:
            return False
        
        # 计算T+1日早盘（假设宽表有早盘涨幅字段，若无则用开盘价-前收盘价）
        if 'morning_pct' in df_hotspot.columns:
            avg_morning_pct = df_hotspot['morning_pct'].mean()
        else:
            avg_morning_pct = (df_hotspot['open'] / df_hotspot['pre_close'] - 1).mean()
        
        return avg_morning_pct >= 0.005  # 早盘平均涨幅≥0.5%，视为持续

In [42]:
import pandas as pd
import os
from typing import List, Tuple, Dict
from datetime import datetime, timedelta

class AuctionSelector:
    def __init__(self, data_loader: DataLoader, hotspot_analyzer: HotspotAnalyzer):
        self.data_loader = data_loader
        self.hotspot_analyzer = hotspot_analyzer
        self.processed_daily_df = hotspot_analyzer.daily_df
        required_fields = ['auc_volume', 'auc_money', 'factor']
        missing_fields = [f for f in required_fields if f not in self.processed_daily_df.columns]
        if missing_fields:
            raise ValueError(f"宽表缺少必要字段：{missing_fields}（factor为复权因子）")
        print(f"[AuctionSelector] 初始化完成，已确认竞价字段和复权因子")

    def _get_t_plus_1_date(self, t_date: str) -> str:
        return (datetime.strptime(t_date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d")
    
    def _get_t_plus_2_date(self, t_date: str) -> str:
        return (datetime.strptime(t_date, "%Y-%m-%d") + timedelta(days=2)).strftime("%Y-%m-%d")

    def select_qualified_stocks(self, t_date: str, save_path: str = ".") -> List[Tuple[str, float, float]]:
        t_plus_1_date = self._get_t_plus_1_date(t_date)
        t_plus_2_date = self._get_t_plus_2_date(t_date)
        if not self._is_trading_day(t_plus_2_date):
            print(f"[AuctionSelector] 警告：T+2日（{t_plus_2_date}）非交易日，跳过该T日")
            return []
        
        candidate_stocks = self.hotspot_analyzer.save_hot_candidates(t_date, save_path)
        if not candidate_stocks:
            return []
        
        # 新增：获取T日热点列表（用于后续持续性验证）
        hot_industries = self.hotspot_analyzer.get_hot_industries(t_date)
        hot_concepts = self.hotspot_analyzer.get_hot_concepts(t_date)
        
        qualified_list = []
        for code in candidate_stocks[:200]:
            indicators = self._calc_auc_indicators(code, t_date, t_plus_1_date)
            if not indicators:
                continue
            
            # 优化1：收紧竞价涨幅至1%-3%（原1%-4%，减少追高）
            meet_pct = 0.01 <= indicators['auction_pct'] <= 0.03
            # 优化2：提高量能倍数至2.5倍（原2倍，确保资金强介入）
            meet_volume = indicators['volume_multiple'] >= 2.5
            # 优化3：验证该股票所属热点的T+1日持续性
            stock_industry = self._get_stock_industry(code, t_date)
            stock_concepts = self._get_stock_concepts(code, t_date)
            meet_hot_persistence = False
            # 检查行业热点持续性
            if stock_industry in hot_industries:
                meet_hot_persistence = self.hotspot_analyzer.check_hotspot_persistence(
                    'industry', stock_industry, t_date
                )
            # 检查概念热点持续性（若行业不满足，检查概念）
            if not meet_hot_persistence:
                for concept in stock_concepts:
                    if concept in hot_concepts:
                        meet_hot_persistence = self.hotspot_analyzer.check_hotspot_persistence(
                            'concept', concept, t_date
                        )
                        if meet_hot_persistence:
                            break
            
            if meet_pct and meet_volume and meet_hot_persistence:
                qualified_list.append((code, indicators['auction_pct'], indicators['volume_multiple']))
        
        qualified_df = pd.DataFrame(qualified_list, columns=['stock_code', 'auction_pct', 'volume_multiple'])
        qualified_df['t_date'] = t_date
        qualified_df['t_plus_1_date'] = t_plus_1_date
        qualified_df['t_plus_2_date'] = t_plus_2_date
        qualified_df['auction_pct'] = qualified_df['auction_pct'].apply(lambda x: f"{x:.2%}")
        
        filename = os.path.join(save_path, f"auction_qualified_{t_date}_to_{t_plus_1_date}.csv")
        qualified_df.to_csv(filename, index=False, encoding="utf-8-sig")
        print(f"[AuctionSelector] 竞价合格股已保存至：{filename}（共{len(qualified_df)}只）")
        return qualified_list

    def _calc_auc_indicators(self, stock_code: str, t_date: str, t_plus_1_date: str) -> Dict:
        t_date_obj = _str_to_date(t_date)
        t_plus_1_date_obj = _str_to_date(t_plus_1_date)
        
        df_t = self.processed_daily_df[
            (self.processed_daily_df['stock_code'] == stock_code) &
            (self.processed_daily_df['date'] == t_date_obj)
        ]
        if df_t.empty:
            return {}
        t_close = df_t['close'].iloc[0]
        t_auc_volume = df_t['auc_volume'].iloc[0]
        if t_auc_volume == 0:
            return {}
        
        df_t1 = self.processed_daily_df[
            (self.processed_daily_df['stock_code'] == stock_code) &
            (self.processed_daily_df['date'] == t_plus_1_date_obj)
        ]
        if df_t1.empty:
            return {}
        
        return {
            'auction_pct': (df_t1['open'].iloc[0] / t_close) - 1,
            'volume_multiple': df_t1['auc_volume'].iloc[0] / t_auc_volume
        }
    
    def _get_stock_industry(self, stock_code: str, t_date: str) -> str:
        """获取股票在T日的行业"""
        t_date_obj = _str_to_date(t_date)
        df = self.processed_daily_df[
            (self.processed_daily_df['stock_code'] == stock_code) &
            (self.processed_daily_df['date'] == t_date_obj)
        ]
        return df['zjw_industry_name'].iloc[0] if not df.empty else ""
    
    def _get_stock_concepts(self, stock_code: str, t_date: str) -> List[str]:
        """获取股票在T日的概念列表"""
        t_date_obj = _str_to_date(t_date)
        df = self.processed_daily_df[
            (self.processed_daily_df['stock_code'] == stock_code) &
            (self.processed_daily_df['date'] == t_date_obj)
        ]
        return df['concepts'].iloc[0] if not df.empty else []
    
    def _is_trading_day(self, date_str: str) -> bool:
        date_obj = _str_to_date(date_str)
        return not self.processed_daily_df[
            self.processed_daily_df['date'] == date_obj
        ].empty

In [None]:
import pandas as pd
import os
import numpy as np
from typing import List, Tuple, Dict

class MinuteTracker:
    def __init__(self, data_loader: DataLoader, qualified_stocks: List[Tuple[str, float, float]]):
        self.data_loader = data_loader
        self.qualified_stocks = qualified_stocks
        print(f"[MinuteTracker] 初始化完成，待跟踪股票：{len(qualified_stocks)}只")

    def generate_buy_signals(self, t_plus_1_date: str, save_path: str = ".") -> Dict[str, Dict]:
        buy_signals = {}
        for code, auc_pct, vol_mult in self.qualified_stocks:
            indicators = self._calc_minute_indicators(code, t_plus_1_date)
            if not indicators:
                continue
            
            # 优化1：量比提高至2.2倍（原2倍，增强量能强度）
            meet_volume = indicators['volume_ratio'] >= 2.2
            # 优化2：价格强度提高至0.8%（原0.5%，确保价格强支撑）
            meet_price = indicators['track_close'] >= indicators['auction_close'] * 1.008
            # 优化3：新增“量能逐分钟递增”（排除脉冲量）
            meet_volume_increase = indicators['volume_increase']
            
            if meet_volume and meet_price and meet_volume_increase:
                buy_signals[code] = {
                    'auction_pct': f"{auc_pct:.2%}",
                    'volume_multiple': vol_mult,
                    'volume_ratio': round(indicators['volume_ratio'], 2),
                    'price_strength': f"{(indicators['track_close']/indicators['auction_close']-1):.2%}",
                    'buy_time': '9:40',
                    'volume_increase': indicators['volume_increase']  # 标记量能是否递增
                }
        
        if buy_signals:
            buy_df = pd.DataFrame.from_dict(buy_signals, orient='index').reset_index()
            buy_df = buy_df.rename(columns={'index': 'stock_code'})
            buy_df['t_plus_1_date'] = t_plus_1_date
            
            filename = os.path.join(save_path, f"buy_signals_{t_plus_1_date}.csv")
            buy_df.to_csv(filename, index=False, encoding="utf-8-sig")
            print(f"[MinuteTracker] 买入信号已保存至：{filename}（共{len(buy_df)}只）")
        return buy_signals

    def _calc_minute_indicators(self, stock_code: str, date: str) -> Dict:
        minutely_data = self.data_loader.load_minutely_data([stock_code], date)
        if stock_code not in minutely_data or minutely_data[stock_code].empty:
            return {}
        df_minute = minutely_data[stock_code].copy()
        
        # 时间窗口：9:30-9:40
        df_minute['datetime'] = pd.to_datetime(df_minute['date'].astype(str) + ' ' + df_minute['time'].astype(str))
        track_mask = (df_minute['datetime'].dt.hour == 9) & (df_minute['datetime'].dt.minute.between(30, 40))
        df_track = df_minute[track_mask].sort_values('datetime').reset_index(drop=True)
        if len(df_track) < 8:  # 至少8分钟数据（确保有足够数据判断递增）
            return {}
        
        # 1. 量比计算（优化：用前5日同期的“中位数”替代“均值”，减少极端值影响）
        all_dates = sorted(self.data_loader.daily_df[self.data_loader.daily_df['stock_code'] == stock_code]['date'].unique())
        target_idx = all_dates.index(_str_to_date(date))
        if target_idx < 5:
            return {}
        prev_dates = [d.strftime("%Y-%m-%d") for d in all_dates[target_idx-5:target_idx]]
        prev_volumes = []
        for d in prev_dates:
            prev_data = self.data_loader.load_minutely_data([stock_code], d)
            if stock_code in prev_data and not prev_data[stock_code].empty:
                prev_min = prev_data[stock_code].copy()
                prev_min['datetime'] = pd.to_datetime(prev_min['date'].astype(str) + ' ' + prev_min['time'].astype(str))
                prev_mask = (prev_min['datetime'].dt.hour == 9) & (prev_min['datetime'].dt.minute.between(30, 40))
                prev_track = prev_min[prev_mask]
                if len(prev_track) >= 5:
                    prev_volumes.append(prev_track['volume'].mean())
        if len(prev_volumes) < 3:
            return {}
        avg_prev_volume = np.median(prev_volumes)  # 改用中位数
        volume_ratio = (df_track['volume'].mean() / avg_prev_volume)
        
        # 2. 价格强度（9:40收盘价）
        track_close = df_track.iloc[-1]['close']
        auction_close = df_track.iloc[0]['open']
        
        # 3. 新增：量能逐分钟递增判断（至少80%的分钟满足“当前>前一分钟”）
        df_track['volume_prev'] = df_track['volume'].shift(1)
        df_track['is_increase'] = df_track['volume'] > df_track['volume_prev']
        increase_ratio = df_track['is_increase'].sum() / (len(df_track) - 1)  # 排除第一行（无prev）
        volume_increase = increase_ratio >= 0.8  # 80%以上分钟递增
        
        return {
            'volume_ratio': volume_ratio,
            'track_close': track_close,
            'auction_close': auction_close,
            'volume_increase': volume_increase  # 返回量能是否递增
        }