## 通过akshare获取当日股票数据 并 存入pickle文件

In [1]:
import akshare as ak
from datetime import datetime, timedelta
from pathlib import Path
import pandas as pd
from pandas import DataFrame, Series
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import random
import pickle
import os
import gc
import psutil

import common

In [None]:
# def get_today_stock_data():
#     """
#     获取每日股票date open high low close 数据
#     可在 限制股票数量 进行测试
#     """
#     file_name = "./txt_lib/stock_code.txt"
#     with open(file_name, "r") as file:
#         stock_list = [line.strip() for line in file if line.strip()]

#     # # 限制股票数量
#     # stock_list = stock_list[:150]

#     def get_stock_data(stock_code: str):
#         """Fetch daily stock data for a stock code"""

#         current_date = datetime.now()

#         df = ak.stock_zh_a_cdr_daily(
#             symbol=stock_code,
#             start_date="2018-01-01",
#             end_date=current_date.strftime("%Y-%m-%d"),
#         )

#         if df is not None:
#             # Convert date columns to strings
#             for col in df.columns:
#                 if pd.api.types.is_datetime64_any_dtype(df[col]):
#                     df[col] = df[col].astype(str)
#             return stock_code, df.to_dict()
#         else:
#             print(f"{stock_code} unavailable.")
#             return stock_code, None

#     with ThreadPoolExecutor(max_workers=5) as executor:
#         results = list(executor.map(get_stock_data, stock_list))

#     # 过滤掉下载失败的结果，只保留成功的股票数据
#     data = {stock_code: df for stock_code, df in results if df is not None}

#     return data

def get_today_stock_data():
    """
    获取每日股票date open high low close 数据
    以批次方式处理股票数据，显示每批进度和内存使用情况
    """
    file_name = "./txt_lib/stock_code.txt"
    with open(file_name, "r") as file:
        stock_list = [line.strip() for line in file if line.strip()]

    def get_stock_data(stock_code: str):
        """Fetch daily stock data for a stock code"""
        try:
            current_date = datetime.now()
            df = ak.stock_zh_a_cdr_daily(
                symbol=stock_code,
                start_date="2018-01-01",
                end_date=current_date.strftime("%Y-%m-%d"),
            )
            if df is not None and not df.empty:
                # Convert date columns to strings
                for col in df.columns:
                    if pd.api.types.is_datetime64_any_dtype(df[col]):
                        df[col] = df[col].astype(str)
                return stock_code, df.to_dict()
            else:
                print(f"{stock_code} unavailable.")
                return stock_code, None
        except Exception as e:
            print(f"Error fetching {stock_code}: {str(e)}")
            return stock_code, None

    data = {}
    batch_size = 100  # Process 100 stocks per batch
    for i in range(0, len(stock_list), batch_size):
        batch = stock_list[i : i + batch_size]
        with ThreadPoolExecutor(max_workers=5) as executor:
            results = list(executor.map(get_stock_data, batch))

        # Add successful results to data dictionary
        for stock_code, df in results:
            if df is not None:
                data[stock_code] = df

        gc.collect()  # Force garbage collection after each batch
        print(
            f"Batch {i//batch_size + 1}/{len(stock_list)//batch_size + 1} completed, "
            f"memory: {psutil.Process().memory_info().rss / 1024**2:.2f} MB"
        )

    return data


def create_pickle(data: dict):
    """先移除前一个工作日的pickle文件,再生成今日的pickle文件 (避免permission error报错)"""
    pickle_file = "./txt_lib/daily_df.pkl"
    try:
        os.remove(pickle_file)
        
    except Exception as e:
        pass
    
    with open(pickle_file, "wb") as f:
        pickle.dump(data, f)

    print("Stock data has been saved to './txt_lib/daily_df.pkl'.")

In [3]:
data = get_today_stock_data()
create_pickle(data)

Batch 1/51 completed, stocks processed: 100, memory: 3251.00 MB
Batch 2/51 completed, stocks processed: 200, memory: 3327.44 MB
Batch 3/51 completed, stocks processed: 300, memory: 3369.45 MB
Batch 4/51 completed, stocks processed: 400, memory: 3447.65 MB
Batch 5/51 completed, stocks processed: 500, memory: 3501.12 MB
Batch 6/51 completed, stocks processed: 600, memory: 3565.02 MB
Error fetching sh600855: HTTPSConnectionPool(host='finance.sina.com.cn', port=443): Max retries exceeded with url: /realstock/company/sh600855/hisdata_klc2/klc_kl.js (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1007)')))
Batch 7/51 completed, stocks processed: 700, memory: 3617.30 MB
Batch 8/51 completed, stocks processed: 800, memory: 3666.38 MB
Batch 9/51 completed, stocks processed: 900, memory: 3769.15 MB
Batch 10/51 completed, stocks processed: 1000, memory: 3911.75 MB
Batch 11/51 completed, stocks processed: 1100, memory: 4035.57 MB
Batch 12/51 completed, stocks proc

## TEST

In [5]:
import calculate
# from importlib import reload
# reload(calculate)
# reload(common)

In [6]:
current_date = datetime.now()
stock_code = "sh600018"


df = ak.stock_zh_a_cdr_daily(
    symbol=stock_code,
    start_date="2018-01-01",
    end_date=current_date.strftime("%Y-%m-%d"),
)

In [7]:
df = common.Read_pickle.read_pickle_data(stock_code)

In [8]:
df.tail(7)

Unnamed: 0,date,open,high,low,close,volume,amount,prevclose
1817,2025-07-02,5.75,5.82,5.75,5.82,31897079.0,184978788.0,
1818,2025-07-03,5.82,5.83,5.77,5.78,23120200.0,133813017.0,
1819,2025-07-04,5.79,5.9,5.78,5.87,46255318.0,271226412.0,
1820,2025-07-07,5.87,5.89,5.83,5.84,22521312.0,131850654.0,
1821,2025-07-08,5.86,5.89,5.77,5.78,40130900.0,232683769.0,
1822,2025-07-09,5.78,5.84,5.76,5.76,36215794.0,210184312.0,
1823,2025-07-10,5.78,5.83,5.77,5.8,35743900.0,207657289.0,
