In [1]:
import pandas as pd
from sqlalchemy import create_engine

# 尝试导入您的数据库配置文件
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。请确保该文件与本脚本在同一目录下。")
    exit()

# --- 配置信息 ---
# 从您的爬虫脚本中我们知道表名叫这个
TABLE_NAME = 'stock_balance_sheets' 

def check_table_health():
    """
    连接数据库，对指定的表进行“体检”，并报告缺失值情况。
    """
    # 1. 初始化数据库连接
    try:
        db_uri = (
            f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
            f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}"
        )
        engine = create_engine(db_uri)
        print("数据库连接成功！")
    except Exception as e:
        print(f"数据库连接失败: {e}")
        return

    # 2. 从数据库加载数据到Pandas DataFrame
    try:
        print(f"\n正在从表 '{TABLE_NAME}' 中加载数据，请稍候...")
        # 使用 pd.read_sql 读取整张表的数据
        df = pd.read_sql(f"SELECT * FROM {TABLE_NAME}", engine)
        print("数据加载完成！")
    except Exception as e:
        print(f"从表 '{TABLE_NAME}' 读取数据时出错: {e}")
        return

    # 3. 数据库体检 - 打印DataFrame基本信息
    print("\n--- 数据库体检报告 ---")
    print(f"数据总行数: {len(df)}")
    print("各字段信息概览 (数据类型 & 非空值统计):")
    # .info() 是进行数据体检最核心的函数
    df.info()

    # 4. 缺失值检查
    print("\n\n--- 缺失值统计报告 ---")
    # 计算每个字段的缺失值数量
    missing_values = df.isnull().sum()
    
    # 计算每个字段的缺失值比例
    missing_percentage = (missing_values / len(df)) * 100
    
    # 将统计结果合并到一个新的DataFrame中，方便查看
    missing_stats = pd.DataFrame({
        '缺失值数量': missing_values,
        '缺失值比例 (%)': missing_percentage
    })
    
    # 只显示那些真正存在缺失值的字段，并按比例降序排列
    missing_stats = missing_stats[missing_stats['缺失值数量'] > 0].sort_values(
        by='缺失值比例 (%)', ascending=False
    )

    if missing_stats.empty:
        print("恭喜！数据中没有任何缺失值。")
    else:
        print("以下字段存在缺失值：")
        # 使用 to_string() 保证即使列名很长也能完整显示
        print(missing_stats.to_string())

if __name__ == '__main__':
    check_table_health()

数据库连接成功！

正在从表 'stock_balance_sheets' 中加载数据，请稍候...
数据加载完成！

--- 数据库体检报告 ---
数据总行数: 353349
各字段信息概览 (数据类型 & 非空值统计):
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353349 entries, 0 to 353348
Data columns (total 11 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   ts_code                     353349 non-null  object 
 1   ann_date                    353349 non-null  object 
 2   end_date                    353349 non-null  object 
 3   report_type                 353349 non-null  object 
 4   comp_type                   353349 non-null  object 
 5   total_assets                353319 non-null  float64
 6   total_liab                  352912 non-null  float64
 7   total_hldr_eqy_inc_min_int  352413 non-null  float64
 8   cap_rese                    348767 non-null  float64
 9   undistr_porfit              351736 non-null  float64
 10  accounts_receiv             341766 non-null  float64
dtypes: float64(6), o

In [4]:
# file: patch_missing_data.py
# 功能：检查并补录缺失的资产负债表数据

import time
import pandas as pd
import tushare as ts
from sqlalchemy import create_engine
from tqdm import tqdm

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
BALANCE_SHEET_TABLE = 'stock_balance_sheets'
BALANCE_SHEET_FIELDS = [ # 从您原脚本复制过来的字段
    'ts_code', 'ann_date', 'end_date', 'report_type', 'comp_type',
    'total_assets', 'total_liab', 'total_hldr_eqy_inc_min_int', 
    'cap_rese', 'undistr_porfit', 'accounts_receiv', 'inventory',
]
START_DATE = '20190101'
END_DATE = '20241231'

# --- 主程序 ---
def patch_missing_balance_sheets():
    # 1. 连接数据库和Tushare
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    # 2. 找出缺失的股票代码 (我们之前的对账逻辑)
    print("\n正在比对数据，查找缺失的股票...")
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = set(all_stocks_df['ts_code'])
    
    processed_stocks_df = pd.read_sql(f"SELECT DISTINCT ts_code FROM {BALANCE_SHEET_TABLE}", engine)
    processed_stock_codes = set(processed_stocks_df['ts_code'])
    
    missing_codes = sorted(list(all_stock_codes - processed_stock_codes))

    if not missing_codes:
        print("恭喜！没有发现任何缺失的股票数据。")
        return
    
    print(f"发现 {len(missing_codes)} 只股票的数据需要补录。")

    # 3. 循环补录缺失的数据
    print("\n开始补录数据...")
    all_patched_data = []
    for ts_code in tqdm(missing_codes, desc="正在补录资产负债表"):
        try:
            df = pro.balancesheet(
                ts_code=ts_code,
                start_date=START_DATE,
                end_date=END_DATE,
                fields=','.join(BALANCE_SHEET_FIELDS)
            )
            if not df.empty:
                all_patched_data.append(df)
            time.sleep(0.6) # 保持API调用间隔
        except Exception as e:
            print(f"\n补录 {ts_code} 时再次出错: {e}。本次将跳过。")
            continue

    # 4. 将所有补录的数据一次性存入数据库
    if not all_patched_data:
        print("\n没有成功获取到任何可补录的数据。")
        return
        
    try:
        print(f"\n成功获取了 {len(all_patched_data)} 只股票的数据，正在存入数据库...")
        patch_df = pd.concat(all_patched_data, ignore_index=True)
        patch_df.to_sql(BALANCE_SHEET_TABLE, engine, if_exists='append', index=False)
        print("数据补录成功！")
    except Exception as e:
        print(f"\n在最后保存数据时出错: {e}")

if __name__ == '__main__':
    patch_missing_balance_sheets()

正在初始化连接...
连接成功！

正在比对数据，查找缺失的股票...
发现 1 只股票的数据需要补录。

开始补录数据...


正在补录资产负债表: 100%|█████████████████████████| 1/1 [00:00<00:00,  1.05it/s]


成功获取了 1 只股票的数据，正在存入数据库...
数据补录成功！





In [6]:
# file: patch_financial_indicators.py
# 功能：检查并补录缺失的财务指标数据

import time
import pandas as pd
import tushare as ts
from sqlalchemy import create_engine
from tqdm import tqdm

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
INDICATOR_TABLE = 'stock_financial_indicators' # <--- 修改点 1: 表名
START_DATE = '20190101'
END_DATE = '20241231'

# ==============================================================================
#  重要！请根据您数据库中 'stock_financial_indicators' 表的实际列名，
#  检查并修改下面的字段列表。
# ==============================================================================
INDICATOR_FIELDS = [
    'ts_code',     # 股票代码
    'ann_date',    # 公告日期
    'end_date',    # 报告期
    
    # --- 1. 盈利能力 (Profitability) ---
    'roe_waa',                 # 加权平均净资产收益率(ROE)
    'netprofit_margin',        # 销售净利率
    'grossprofit_margin',      # 销售毛利率
    'profit_to_gr',            # 净利/营业总收入
    'op_of_gr',                # 营业利润/营业总收入
    'roa_yearly',              # 年化总资产报酬率(ROA)
    
    # --- 2. 成长性 (Growth) ---
    'netprofit_yoy',           # 归属母公司股东的净利润同比增长率(%)
    'or_yoy',                  # 营业收入同比增长率(%)
    'gr_yoy',                  # 营业总收入同比增长率(%)
    
    # --- 3. 安全性 (Safety) / 偿债能力 ---
    'debt_to_assets',          # 资产负债率
    'current_ratio',           # 流动比率
    'quick_ratio',             # 速动比率
]
# ==============================================================================


# --- 主程序 (逻辑与上次完全相同) ---
def patch_missing_indicators():
    # 1. 连接
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    # 2. 查找缺失的股票
    print(f"\n正在比对 '{INDICATOR_TABLE}'，查找缺失的股票...")
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = set(all_stocks_df['ts_code'])
    
    processed_stocks_df = pd.read_sql(f"SELECT DISTINCT ts_code FROM {INDICATOR_TABLE}", engine)
    processed_stock_codes = set(processed_stocks_df['ts_code'])
    
    missing_codes = sorted(list(all_stock_codes - processed_stock_codes))

    if not missing_codes:
        print("恭喜！没有发现任何缺失的股票数据。")
        return
    
    print(f"发现 {len(missing_codes)} 只股票的数据需要补录。")

    # 3. 补录数据
    print("\n开始补录数据...")
    all_patched_data = []
    for ts_code in tqdm(missing_codes, desc=f"正在补录财务指标"):
        try:
            # <--- 修改点 2: API函数更换为 fina_indicator
            df = pro.fina_indicator(
                ts_code=ts_code,
                start_date=START_DATE,
                end_date=END_DATE,
                fields=','.join(INDICATOR_FIELDS)
            )
            if not df.empty:
                all_patched_data.append(df)
            time.sleep(0.6)
        except Exception as e:
            print(f"\n补录 {ts_code} 时再次出错: {e}。本次将跳过。")
            continue

    # 4. 存入数据库
    if not all_patched_data:
        print("\n没有成功获取到任何可补录的数据。")
        return
        
    try:
        print(f"\n成功获取了 {len(all_patched_data)} 只股票的数据，正在存入数据库...")
        patch_df = pd.concat(all_patched_data, ignore_index=True)
        patch_df.to_sql(INDICATOR_TABLE, engine, if_exists='append', index=False)
        print("数据补录成功！")
    except Exception as e:
        print(f"\n在最后保存数据时出错: {e}")

if __name__ == '__main__':
    patch_missing_indicators()

正在初始化连接...
连接成功！

正在比对 'stock_financial_indicators'，查找缺失的股票...
发现 1 只股票的数据需要补录。

开始补录数据...


正在补录财务指标: 100%|███████████████████████████| 1/1 [00:00<00:00,  1.34it/s]


成功获取了 1 只股票的数据，正在存入数据库...
数据补录成功！





In [7]:
# file: patch_financial_indicators.py
# 功能：检查并补录缺失的财务指标数据

import time
import pandas as pd
import tushare as ts
from sqlalchemy import create_engine
from tqdm import tqdm

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
INDICATOR_TABLE = 'stock_financial_indicators' # <--- 修改点 1: 表名
START_DATE = '20190101'
END_DATE = '20241231'

# ==============================================================================
#  重要！请根据您数据库中 'stock_financial_indicators' 表的实际列名，
#  检查并修改下面的字段列表。
# ==============================================================================
INDICATOR_FIELDS = [
    'ts_code',     # 股票代码
    'ann_date',    # 公告日期
    'end_date',    # 报告期
    
    # --- 1. 盈利能力 (Profitability) ---
    'roe_waa',                 # 加权平均净资产收益率(ROE)
    'netprofit_margin',        # 销售净利率
    'grossprofit_margin',      # 销售毛利率
    'profit_to_gr',            # 净利/营业总收入
    'op_of_gr',                # 营业利润/营业总收入
    'roa_yearly',              # 年化总资产报酬率(ROA)
    'q_roe'
    # --- 2. 成长性 (Growth) ---
    'netprofit_yoy',           # 归属母公司股东的净利润同比增长率(%)
    'or_yoy',                  # 营业收入同比增长率(%)
    'gr_yoy',                  # 营业总收入同比增长率(%)
    
    # --- 3. 安全性 (Safety) / 偿债能力 ---
    'debt_to_assets',          # 资产负债率
    'current_ratio',           # 流动比率
    'quick_ratio',             # 速动比率
]
# ==============================================================================


# --- 主程序 (逻辑与上次完全相同) ---
def patch_missing_indicators():
    # 1. 连接
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    # 2. 查找缺失的股票
    print(f"\n正在比对 '{INDICATOR_TABLE}'，查找缺失的股票...")
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = set(all_stocks_df['ts_code'])
    
    processed_stocks_df = pd.read_sql(f"SELECT DISTINCT ts_code FROM {INDICATOR_TABLE}", engine)
    processed_stock_codes = set(processed_stocks_df['ts_code'])
    
    missing_codes = sorted(list(all_stock_codes - processed_stock_codes))

    if not missing_codes:
        print("恭喜！没有发现任何缺失的股票数据。")
        return
    
    print(f"发现 {len(missing_codes)} 只股票的数据需要补录。")

    # 3. 补录数据
    print("\n开始补录数据...")
    all_patched_data = []
    for ts_code in tqdm(missing_codes, desc=f"正在补录财务指标"):
        try:
            # <--- 修改点 2: API函数更换为 fina_indicator
            df = pro.fina_indicator(
                ts_code=ts_code,
                start_date=START_DATE,
                end_date=END_DATE,
                fields=','.join(INDICATOR_FIELDS)
            )
            if not df.empty:
                all_patched_data.append(df)
            time.sleep(0.6)
        except Exception as e:
            print(f"\n补录 {ts_code} 时再次出错: {e}。本次将跳过。")
            continue

    # 4. 存入数据库
    if not all_patched_data:
        print("\n没有成功获取到任何可补录的数据。")
        return
        
    try:
        print(f"\n成功获取了 {len(all_patched_data)} 只股票的数据，正在存入数据库...")
        patch_df = pd.concat(all_patched_data, ignore_index=True)
        patch_df.to_sql(INDICATOR_TABLE, engine, if_exists='append', index=False)
        print("数据补录成功！")
    except Exception as e:
        print(f"\n在最后保存数据时出错: {e}")

if __name__ == '__main__':
    patch_missing_indicators()

正在初始化连接...
连接成功！

正在比对 'stock_financial_indicators'，查找缺失的股票...
恭喜！没有发现任何缺失的股票数据。


In [8]:
# file: patch_financial_indicators.py (最终版)
# 功能：根据您提供的完整字段列表，检查并补录缺失的财务指标数据

import time
import pandas as pd
import tushare as ts
from sqlalchemy import create_engine
from tqdm import tqdm

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
INDICATOR_TABLE = 'stock_financial_indicators'
START_DATE = '20190101'
END_DATE = '20241231'

# --- 字段列表 (根据您的输入生成) ---
INDICATOR_FIELDS = [
    # 核心识别字段
    'ts_code',
    'ann_date',
    'end_date',
    
    # 您提供的完整字段列表
    'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps',
    'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio',
    'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn',
    'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 'valuechange_income',
    'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', 'fcfe', 'current_exint',
    'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital',
    'networking_capital', 'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps',
    'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin',
    'grossprofit_margin', 'cogs_of_sales', 'expense_of_sales', 'profit_to_gr',
    'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr',
    'op_of_gr', 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'nptafloat', 'roic',
    'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt',
    'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or',
    'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets',
    'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets',
    'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt',
    'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt',

    'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt',
    'tangibleasset_to_netdebt', 'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt',
    'ebit_to_interest', 'longdebt_to_workingcapital', 'ebitda_to_debt', 'turn_days',
    'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit',
    'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt',
    'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly',
    'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps',
    'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr',
    'q_saleexp_to_gr', 'q_adminexp_to_gr', 'q_finaexp_to_gr', 'q_impair_to_gr_ttm',
    'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_nptafloat', 'q_opincome_to_ebt',
    'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or',
    'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy',
    'ebt_yoy', 'netprofit_yoy', 'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy',
    'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy',
    'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', 'q_profit_yoy', 'q_profit_qoq',
    'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp', 'update_flag'
]

# --- 主程序 (无需修改) ---
def patch_missing_indicators():
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    print(f"\n正在比对 '{INDICATOR_TABLE}'，查找缺失的股票...")
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = set(all_stocks_df['ts_code'])
    
    processed_stocks_df = pd.read_sql(f"SELECT DISTINCT ts_code FROM {INDICATOR_TABLE}", engine)
    processed_stock_codes = set(processed_stocks_df['ts_code'])
    
    missing_codes = sorted(list(all_stock_codes - processed_stock_codes))

    if not missing_codes:
        print("恭喜！没有发现任何缺失的股票数据。")
        return
    
    print(f"发现 {len(missing_codes)} 只股票的数据需要补录。")

    print("\n开始补录数据...")
    all_patched_data = []
    for ts_code in tqdm(missing_codes, desc=f"正在补录财务指标"):
        try:
            df = pro.fina_indicator(
                ts_code=ts_code,
                start_date=START_DATE,
                end_date=END_DATE,
                fields=','.join(INDICATOR_FIELDS)
            )
            if not df.empty:
                all_patched_data.append(df)
            time.sleep(0.6)
        except Exception as e:
            print(f"\n补录 {ts_code} 时再次出错: {e}。本次将跳过。")
            continue

    if not all_patched_data:
        print("\n没有成功获取到任何可补录的数据。")
        return
        
    try:
        print(f"\n成功获取了 {len(all_patched_data)} 只股票的数据，正在存入数据库...")
        patch_df = pd.concat(all_patched_data, ignore_index=True)
        patch_df.to_sql(INDICATOR_TABLE, engine, if_exists='append', index=False)
        print("数据补录成功！")
    except Exception as e:
        print(f"\n在最后保存数据时出错: {e}")

if __name__ == '__main__':
    patch_missing_indicators()

正在初始化连接...
连接成功！

正在比对 'stock_financial_indicators'，查找缺失的股票...
恭喜！没有发现任何缺失的股票数据。


In [9]:
# file: verify_and_add_columns.py
# 功能：自动化检查表的字段是否齐全，并补上缺失的字段。

import pandas as pd
from sqlalchemy import create_engine, text

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

TARGET_TABLE = 'stock_financial_indicators'

# --- 基于您给出的完整列表，我们定义期望的字段及其Tushare类型 ---
# 我已经帮您从那一大段文字中解析好了
EXPECTED_SCHEMA = {
    'dt_eps': 'floatY', 'total_revenue_ps': 'floatY', 'revenue_ps': 'floatY', 'capital_rese_ps': 'floatY', 
    'surplus_rese_ps': 'floatY', 'undist_profit_ps': 'floatY', 'extra_item': 'floatY', 'profit_dedt': 'floatY', 
    'gross_margin': 'floatY', 'current_ratio': 'floatY', 'quick_ratio': 'floatY', 'cash_ratio': 'floatY', 
    'invturn_days': 'floatN', 'arturn_days': 'floatN', 'inv_turn': 'floatN', 'ar_turn': 'floatY', 'ca_turn': 'floatY', 
    'fa_turn': 'floatY', 'assets_turn': 'floatY', 'op_income': 'floatY', 'valuechange_income': 'floatN', 
    'interst_income': 'floatN', 'daa': 'floatN', 'ebit': 'floatY', 'ebitda': 'floatY', 'fcff': 'floatY', 
    'fcfe': 'floatY', 'current_exint': 'floatY', 'noncurrent_exint': 'floatY', 'interestdebt': 'floatY', 
    'netdebt': 'floatY', 'tangible_asset': 'floatY', 'working_capital': 'floatY', 'networking_capital': 'floatY', 
    'invest_capital': 'floatY', 'retained_earnings': 'floatY', 'diluted2_eps': 'floatY', 'bps': 'floatY', 
    'ocfps': 'floatY', 'retainedps': 'floatY', 'cfps': 'floatY', 'ebit_ps': 'floatY', 'fcff_ps': 'floatY', 
    'fcfe_ps': 'floatY', 'netprofit_margin': 'floatY', 'grossprofit_margin': 'floatY', 'cogs_of_sales': 'floatY', 
    'expense_of_sales': 'floatY', 'profit_to_gr': 'floatY', 'saleexp_to_gr': 'floatY', 'adminexp_of_gr': 'floatY', 
    'finaexp_of_gr': 'floatY', 'impai_ttm': 'floatY', 'gc_of_gr': 'floatY', 'op_of_gr': 'floatY', 'ebit_of_gr': 'floatY', 
    'roe': 'floatY', 'roe_waa': 'floatY', 'roe_dt': 'floatY', 'roa': 'floatY', 'nptafloat': 'floatY', 'roic': 'floatY', 
    'roe_yearly': 'floatY', 'roa2_yearly': 'floatY', 'roe_avg': 'floatN', 'opincome_of_ebt': 'floatN', 
    'investincome_of_ebt': 'floatN', 'n_op_profit_of_ebt': 'floatN', 'tax_to_ebt': 'floatN', 
    'dtprofit_to_profit': 'floatN', 'salescash_to_or': 'floatN', 'ocf_to_or': 'floatN', 'ocf_to_opincome': 'floatN', 
    'capitalized_to_da': 'floatN', 'debt_to_assets': 'floatY', 'assets_to_eqt': 'floatY', 'dp_assets_to_eqt': 'floatY', 
    'ca_to_assets': 'floatY', 'nca_to_assets': 'floatY', 'tbassets_to_totalassets': 'floatY', 'int_to_talcap': 'floatY', 
    'eqt_to_talcapital': 'floatY', 'currentdebt_to_debt': 'floatY', 'longdeb_to_debt': 'floatY', 'ocf_to_shortdebt': 'floatY', 
    'debt_to_eqt': 'floatY', 'eqt_to_debt': 'floatY', 'eqt_to_interestdebt': 'floatY', 'tangibleasset_to_debt': 'floatY', 
    'tangasset_to_intdebt': 'floatY', 'tangibleasset_to_netdebt': 'floatY', 'ocf_to_debt': 'floatY', 
    'ocf_to_interestdebt': 'floatN', 'ocf_to_netdebt': 'floatN', 'ebit_to_interest': 'floatN', 
    'longdebt_to_workingcapital': 'floatN', 'ebitda_to_debt': 'floatN', 'turn_days': 'floatY', 'roa_yearly': 'floatY', 
    'roa_dp': 'floatY', 'fixed_assets': 'floatY', 'profit_prefin_exp': 'floatN', 'non_op_profit': 'floatN', 
    'op_to_ebt': 'floatN', 'nop_to_ebt': 'floatN', 'ocf_to_profit': 'floatN', 'cash_to_liqdebt': 'floatN', 
    'cash_to_liqdebt_withinterest': 'floatN', 'op_to_liqdebt': 'floatN', 'op_to_debt': 'floatN', 'roic_yearly': 'floatN', 
    'total_fa_trun': 'floatN', 'profit_to_op': 'floatY', 'q_opincome': 'floatN', 'q_investincome': 'floatN', 
    'q_dtprofit': 'floatN', 'q_eps': 'floatN', 'q_netprofit_margin': 'floatN', 'q_gsprofit_margin': 'floatN', 
    'q_exp_to_sales': 'floatN', 'q_profit_to_gr': 'floatN', 'q_saleexp_to_gr': 'floatY', 'q_adminexp_to_gr': 'floatN', 
    'q_finaexp_to_gr': 'floatN', 'q_impair_to_gr_ttm': 'floatN', 'q_gc_to_gr': 'floatY', 'q_op_to_gr': 'floatN', 
    'q_roe': 'floatY', 'q_dt_roe': 'floatY', 'q_nptafloat': 'floatY', 'q_opincome_to_ebt': 'floatN', 
    'q_investincome_to_ebt': 'floatN', 'q_dtprofit_to_profit': 'floatN', 'q_salescash_to_or': 'floatN', 
    'q_ocf_to_sales': 'floatY', 'q_ocf_to_or': 'floatN', 'basic_eps_yoy': 'floatY', 'dt_eps_yoy': 'floatY', 
    'cfps_yoy': 'floatY', 'op_yoy': 'floatY', 'ebt_yoy': 'floatY', 'netprofit_yoy': 'floatY', 'dt_netprofit_yoy': 'floatY', 
    'ocf_yoy': 'floatY', 'roe_yoy': 'floatY', 'bps_yoy': 'floatY', 'assets_yoy': 'floatY', 'eqt_yoy': 'floatY', 
    'tr_yoy': 'floatY', 'or_yoy': 'floatY', 'q_gr_yoy': 'floatN', 'q_gr_qoq': 'floatN', 'q_sales_yoy': 'floatY', 
    'q_sales_qoq': 'floatN', 'q_op_yoy': 'floatN', 'q_op_qoq': 'floatY', 'q_profit_yoy': 'floatN', 
    'q_profit_qoq': 'floatN', 'q_netprofit_yoy': 'floatN', 'q_netprofit_qoq': 'floatN', 'equity_yoy': 'floatY', 
    'rd_exp': 'floatN', 'update_flag': 'strN'
}
# 加上必须的识别字段
EXPECTED_SCHEMA['ts_code'] = 'strN'
EXPECTED_SCHEMA['ann_date'] = 'strN'
EXPECTED_SCHEMA['end_date'] = 'strN'

def get_sql_type(tushare_type):
    """将Tushare类型映射到PostgreSQL类型"""
    if tushare_type and tushare_type.startswith('float'):
        return 'DOUBLE PRECISION'
    elif tushare_type and tushare_type.startswith('int'):
        return 'BIGINT'
    else: # strN and others
        return 'TEXT'

def verify_and_add_columns():
    # 1. 连接数据库
    print("正在初始化数据库连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    
    # 2. 获取表中现有字段
    print(f"正在读取 '{TARGET_TABLE}' 表的现有字段...")
    try:
        with engine.connect() as connection:
            # 使用 information_schema 查询表的元数据，这是标准的SQL做法
            query = text(f"""
                SELECT column_name 
                FROM information_schema.columns
                WHERE table_schema = 'public' AND table_name = '{TARGET_TABLE}';
            """)
            result = connection.execute(query)
            existing_columns = {row[0] for row in result}
            print(f"表中当前有 {len(existing_columns)} 个字段。")
    except Exception as e:
        print(f"读取表结构失败: {e}")
        print("请检查表名是否正确，或者表是否确实存在。")
        return

    # 3. 比对，找出缺失的字段
    expected_columns_set = set(EXPECTED_SCHEMA.keys())
    missing_columns = expected_columns_set - existing_columns

    if not missing_columns:
        print("\n恭喜！表结构完整，所有期望的字段都已存在，无需任何操作。")
        return

    print(f"\n检查发现缺失 {len(missing_columns)} 个字段，列表如下:")
    print(missing_columns)

    # 4. 循环执行 ALTER TABLE ... ADD COLUMN ... 来添加缺失的字段
    print("\n正在向表中添加缺失的字段...")
    with engine.connect() as connection:
        for col_name in sorted(list(missing_columns)): #排序以保证顺序
            try:
                col_type = get_sql_type(EXPECTED_SCHEMA.get(col_name))
                add_column_sql = text(f'ALTER TABLE "{TARGET_TABLE}" ADD COLUMN "{col_name}" {col_type};')
                connection.execute(add_column_sql)
                # PostgreSQL 11+ and SQLAlchemy 1.4+ support transactional DDL, 
                # so we need to commit the change.
                connection.commit()
                print(f"  - 成功添加字段: '{col_name}' (类型: {col_type})")
            except Exception as e:
                print(f"  - !!! 添加字段 '{col_name}' 时失败: {e}")
                # If one fails, we might want to stop or continue. For now, we continue.
                connection.rollback()
    
    print("\n所有缺失字段添加完毕！现在您的表结构是最新的了。")

if __name__ == '__main__':
    verify_and_add_columns()

正在初始化数据库连接...
正在读取 'stock_financial_indicators' 表的现有字段...
表中当前有 108 个字段。

检查发现缺失 61 个字段，列表如下:
{'q_exp_to_sales', 'q_op_yoy', 'q_profit_yoy', 'q_dtprofit_to_profit', 'q_investincome_to_ebt', 'nptafloat', 'q_finaexp_to_gr', 'arturn_days', 'ocf_to_netdebt', 'nop_to_ebt', 'capitalized_to_da', 'q_op_to_gr', 'profit_prefin_exp', 'q_eps', 'q_profit_to_gr', 'q_investincome', 'q_opincome', 'inv_turn', 'q_nptafloat', 'ebit_to_interest', 'roe_avg', 'salescash_to_or', 'valuechange_income', 'daa', 'q_ocf_to_or', 'q_netprofit_yoy', 'rd_exp', 'tax_to_ebt', 'update_flag', 'ocf_to_or', 'q_gr_qoq', 'q_gsprofit_margin', 'dtprofit_to_profit', 'n_op_profit_of_ebt', 'opincome_of_ebt', 'q_profit_qoq', 'q_netprofit_margin', 'non_op_profit', 'q_impair_to_gr_ttm', 'ocf_to_opincome', 'q_dtprofit', 'ebitda_to_debt', 'q_sales_qoq', 'ocf_to_interestdebt', 'roic_yearly', 'q_adminexp_to_gr', 'q_salescash_to_or', 'q_netprofit_qoq', 'op_to_debt', 'invturn_days', 'q_gr_yoy', 'investincome_of_ebt', 'total_fa_trun', 'ocf_

In [None]:
# file: full_data_backfill.py
# 功能：对所有股票进行财务指标数据的全量回填，并处理API分页限制。

import time
import pandas as pd
from sqlalchemy import create_engine, text
from tqdm import tqdm
from datetime import datetime, timedelta

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
INDICATOR_TABLE = 'stock_financial_indicators'
START_DATE = '20180101'
END_DATE = '20250531' # 您可以根据需要调整最新的日期

# 从上个脚本复制过来的完整字段列表
INDICATOR_FIELDS = [
    'ts_code', 'ann_date', 'end_date', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps',
    'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 
    'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 
    'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', 'fcfe', 'current_exint', 
    'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 
    'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 
    'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', 'cogs_of_sales', 'expense_of_sales', 
    'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', 
    'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'nptafloat', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 
    'opincome_of_ebt', 'investincome_of_ebt', 'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 
    'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', 'assets_to_eqt', 
    'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 
    'eqt_to_talcapital', 'currentdebt_to_debt', 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt',
    'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', 'tangibleasset_to_netdebt', 
    'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 
    'ebitda_to_debt', 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 
    'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 
    'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps',
    'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 
    'q_adminexp_to_gr', 'q_finaexp_to_gr', 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 
    'q_nptafloat', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or', 
    'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 
    'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 
    'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', 'q_profit_yoy', 'q_profit_qoq', 
    'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp', 'update_flag'
]

def fetch_all_data_with_pagination(pro, ts_code, start_date, end_date, fields):
    """
    使用循环和分页逻辑获取单个股票在整个时间范围内的所有数据。
    """
    all_data = []
    current_end_date = end_date
    
    while True:
        try:
            df = pro.fina_indicator(
                ts_code=ts_code,
                start_date=start_date,
                end_date=current_end_date,
                fields=','.join(fields)
            )
            
            if df is None or df.empty:
                # 如果返回空，说明这个时间段没有更多数据了
                break
                
            all_data.append(df)
            
            # 如果返回的记录数小于100，说明已经是最后一页了
            if len(df) < 100:
                break
            
            # 准备下一次请求：将结束日期设置为本次获取到的最早公告日期的前一天
            # Tushare的end_date是按ann_date倒序排列的
            earliest_ann_date_str = df['ann_date'].min()
            earliest_date = datetime.strptime(earliest_ann_date_str, '%Y%m%d')
            current_end_date = (earliest_date - timedelta(days=1)).strftime('%Y%m%d')
            
            # 如果新的结束日期早于开始日期，就没必要继续了
            if current_end_date < start_date:
                break

            time.sleep(0.6) # 每次分页请求后也稍作等待

        except Exception as e:
            print(f"\n在为 {ts_code} 分页下载时出错: {e}。终止该股票的下载。")
            return None # 返回None表示下载失败

    if not all_data:
        return pd.DataFrame() # 如果列表为空，返回一个空的DataFrame
        
    return pd.concat(all_data, ignore_index=True).drop_duplicates()

def backfill_all_data():
    # 1. 连接
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    # 2. 获取所有股票列表
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = all_stocks_df['ts_code'].tolist()
    print(f"将为 {len(all_stock_codes)} 只股票进行数据全量回填...")

    # 3. 遍历每只股票进行“先删后插”
    with engine.connect() as connection:
        for ts_code in tqdm(all_stock_codes, desc="全量回填财务指标"):
            transaction = None
            try:
                # 开始数据库事务
                transaction = connection.begin()

                # 第一步：删除该股票的所有旧数据
                delete_sql = text(f"DELETE FROM {INDICATOR_TABLE} WHERE ts_code = :code")
                connection.execute(delete_sql, {"code": ts_code})
                
                # 第二步：使用分页逻辑下载全新的完整数据
                full_data_df = fetch_all_data_with_pagination(pro, ts_code, START_DATE, END_DATE, INDICATOR_FIELDS)
                
                # 第三步：如果成功下载到数据，则插入
                if full_data_df is not None and not full_data_df.empty:
                    full_data_df.to_sql(INDICATOR_TABLE, connection, if_exists='append', index=False)
                
                # 提交事务
                transaction.commit()
                time.sleep(0.6) # 成功处理完一只股票后，稍作等待

            except Exception as e:
                print(f"\n处理 {ts_code} 时发生严重错误: {e}")
                if transaction:
                    transaction.rollback() # 如果出错，回滚事务，保证旧数据不被删除
                print(f"已回滚对 {ts_code} 的操作，其旧数据保持不变。")

    print("\n所有股票数据回填完毕！")


if __name__ == '__main__':
    # 再次提醒备份！
    print("="*60)
    print("警告：此脚本将删除并重新下载数据，请确保您已备份！")
    print("="*60)
    # answer = input("您是否已备份数据并希望继续？(yes/no): ")
    # if answer.lower() == 'yes':
    #     backfill_all_data()
    # else:
    #     print("操作已取消。")
    backfill_all_data() # 为方便直接运行，暂时注释掉交互确认

In [11]:
# file: full_data_backfill.py
# 功能：对所有股票进行财务指标数据的全量回填，并处理API分页限制。

import time
import pandas as pd
from sqlalchemy import create_engine, text
from tqdm import tqdm
from datetime import datetime, timedelta

# --- 配置 ---
try:
    import config
except ImportError:
    print("错误：无法找到配置文件 config.py。")
    exit()

STOCK_BASIC_TABLE = 'stock_basic_info'
INDICATOR_TABLE = 'stock_financial_indicators'
START_DATE = '20180101'
END_DATE = '20250531' # 您可以根据需要调整最新的日期

# 从上个脚本复制过来的完整字段列表
INDICATOR_FIELDS = [
    'ts_code', 'ann_date', 'end_date', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps',
    'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 
    'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 
    'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', 'fcfe', 'current_exint', 
    'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 
    'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 
    'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', 'cogs_of_sales', 'expense_of_sales', 
    'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', 
    'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'nptafloat', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 
    'opincome_of_ebt', 'investincome_of_ebt', 'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 
    'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', 'assets_to_eqt', 
    'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 
    'eqt_to_talcapital', 'currentdebt_to_debt', 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt',
    'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', 'tangibleasset_to_netdebt', 
    'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 
    'ebitda_to_debt', 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 
    'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 
    'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps',
    'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 
    'q_adminexp_to_gr', 'q_finaexp_to_gr', 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 
    'q_nptafloat', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or', 
    'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 
    'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 
    'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', 'q_profit_yoy', 'q_profit_qoq', 
    'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp', 'update_flag'
]

def fetch_all_data_with_pagination(pro, ts_code, start_date, end_date, fields):
    """
    使用循环和分页逻辑获取单个股票在整个时间范围内的所有数据。
    """
    all_data = []
    current_end_date = end_date
    
    while True:
        try:
            df = pro.fina_indicator(
                ts_code=ts_code,
                start_date=start_date,
                end_date=current_end_date,
                fields=','.join(fields)
            )
            
            if df is None or df.empty:
                # 如果返回空，说明这个时间段没有更多数据了
                break
                
            all_data.append(df)
            
            # 如果返回的记录数小于100，说明已经是最后一页了
            if len(df) < 100:
                break
            
            # 准备下一次请求：将结束日期设置为本次获取到的最早公告日期的前一天
            # Tushare的end_date是按ann_date倒序排列的
            earliest_ann_date_str = df['ann_date'].min()
            earliest_date = datetime.strptime(earliest_ann_date_str, '%Y%m%d')
            current_end_date = (earliest_date - timedelta(days=1)).strftime('%Y%m%d')
            
            # 如果新的结束日期早于开始日期，就没必要继续了
            if current_end_date < start_date:
                break

            time.sleep(0.6) # 每次分页请求后也稍作等待

        except Exception as e:
            print(f"\n在为 {ts_code} 分页下载时出错: {e}。终止该股票的下载。")
            return None # 返回None表示下载失败

    if not all_data:
        return pd.DataFrame() # 如果列表为空，返回一个空的DataFrame
        
    return pd.concat(all_data, ignore_index=True).drop_duplicates()

def backfill_all_data():
    # 1. 连接
    print("正在初始化连接...")
    db_uri = (f"postgresql+psycopg2://{config.DB_USER}:{config.DB_PASS}@"
              f"{config.DB_HOST}:{config.DB_PORT}/{config.DB_NAME}")
    engine = create_engine(db_uri)
    pro = ts.pro_api(config.TUSHARE_TOKEN)
    print("连接成功！")

    # 2. 获取所有股票列表
    all_stocks_df = pd.read_sql(f"SELECT ts_code FROM {STOCK_BASIC_TABLE}", engine)
    all_stock_codes = all_stocks_df['ts_code'].tolist()
    print(f"将为 {len(all_stock_codes)} 只股票进行数据全量回填...")

    # 3. 遍历每只股票进行“先删后插”
    with engine.connect() as connection:
        for ts_code in tqdm(all_stock_codes, desc="全量回填财务指标"):
            transaction = None
            try:
                # 开始数据库事务
                transaction = connection.begin()

                # 第一步：删除该股票的所有旧数据
                delete_sql = text(f"DELETE FROM {INDICATOR_TABLE} WHERE ts_code = :code")
                connection.execute(delete_sql, {"code": ts_code})
                
                # 第二步：使用分页逻辑下载全新的完整数据
                full_data_df = fetch_all_data_with_pagination(pro, ts_code, START_DATE, END_DATE, INDICATOR_FIELDS)
                
                # 第三步：如果成功下载到数据，则插入
                if full_data_df is not None and not full_data_df.empty:
                    full_data_df.to_sql(INDICATOR_TABLE, connection, if_exists='append', index=False)
                
                # 提交事务
                transaction.commit()
                time.sleep(0.6) # 成功处理完一只股票后，稍作等待

            except Exception as e:
                print(f"\n处理 {ts_code} 时发生严重错误: {e}")
                if transaction:
                    transaction.rollback() # 如果出错，回滚事务，保证旧数据不被删除
                print(f"已回滚对 {ts_code} 的操作，其旧数据保持不变。")

    print("\n所有股票数据回填完毕！")


if __name__ == '__main__':
    # 再次提醒备份！
    print("="*60)
    print("警告：此脚本将删除并重新下载数据，请确保您已备份！")
    print("="*60)
    # answer = input("您是否已备份数据并希望继续？(yes/no): ")
    # if answer.lower() == 'yes':
    #     backfill_all_data()
    # else:
    #     print("操作已取消。")
    backfill_all_data() # 为方便直接运行，暂时注释掉交互确认

警告：此脚本将删除并重新下载数据，请确保您已备份！
正在初始化连接...
连接成功！
将为 5413 只股票进行数据全量回填...


全量回填财务指标: 100%|███████████████████| 5413/5413 [1:29:56<00:00,  1.00it/s]


所有股票数据回填完毕！



