In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent
from urllib.parse import urljoin
import time
import math

class StockScraper:
    def __init__(self, page_size=50, sleep_sec=0.5):
        self.ua = UserAgent()
        self.page_size = page_size
        self.sleep_sec = sleep_sec
        self.base_url = "https://push2.eastmoney.com/api/qt/clist/get"
        self.params = {
            "fid": "f184",
            "po": "1",
            "pz": str(page_size),
            "pn": "1",
            "np": "1",
            "fltt": "2",
            "invt": "2",
            "ut": "8dec03ba335b81bf4ebdf7b29ec27d15",
            "fs": "m:0+t:6+f:!2,m:0+t:13+f:!2,m:0+t:80+f:!2,m:1+t:2+f:!2,m:1+t:23+f:!2,m:0+t:7+f:!2,m:1+t:3+f:!2",
            "fields": "f2,f3,f12,f14,f100,f109,f160,f165,f175,f184,f225,f263,f264"
        }
        self.headers = {
            "User-Agent": self.ua.random,
            "Referer": "https://data.eastmoney.com/zjlx/list.html"
        }
        self.field_map = {
            "f2": "最新价",
            "f3": "今日涨跌(%)",
            "f225": "今日排名",
            "f12": "代码",
            "f14": "名称",
            "f100": "所属板块",
            "f109": "5日涨跌(%)",
            "f160": "10日涨跌(%)",
            "f165": "5日排行榜主力净占比(%)",
            "f175": "10日排行榜主力净占比(%)",
            "f184": "今日排行榜主力净占比(%)",
            "f263": "5日排名",
            "f264": "10日排名"
        }

    def _get_total_pages(self):
        res = requests.get(self.base_url, params=self.params, headers=self.headers)
        res_json = res.json()
        total = res_json['data']['total']
        pages = math.ceil(total / self.page_size)
        return total, pages

    def scrape_all(self):
        total, pages = self._get_total_pages()
        print(f"共 {total} 条数据，{pages} 页，每页 {self.page_size} 条")
        all_data = []

        for page in range(1, pages + 1):
            print(f"抓取第 {page} 页...")
            self.params['pn'] = str(page)
            res = requests.get(self.base_url, params=self.params, headers=self.headers)
            res_json = res.json()
            data = res_json['data']['diff']
            all_data.extend(data)
            time.sleep(self.sleep_sec)

        df = pd.DataFrame(all_data)
        df = df.rename(columns=self.field_map)
        return df

    def save_csv(self, df: pd.DataFrame, filename="股票实时基本数据.csv"):
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        print(f"数据已保存到 {filename}")

    
    def get_stock_list(self):
        total, pages = self._get_total_pages()
        print(f"共 {total} 支股票，{pages} 页")
        stock_ids = []

        for page in range(1, pages + 1):
            print(f"抓取第 {page} 页股票代码...")
            self.params['pn'] = str(page)
            res = requests.get(self.base_url, params=self.params, headers=self.headers)
            res_json = res.json()
            data = res_json['data']['diff']
            for item in data:
                market_prefix = "1" if item["f12"].startswith("6") else "0"
                secid = f"{market_prefix}.{item['f12']}"
                stock_ids.append(secid)
            time.sleep(0.5)

        print(f"总共获取了 {len(stock_ids)} 支股票代码")
        return stock_ids

# scraper = StockScraper(page_size=50)
# df = scraper.scrape_all()
# scraper.save_csv(df=df)

In [2]:
import requests
import pandas as pd
import time

# 字段映射（API字段到中文）
mapping_field = {
    'f51': '日期',
    'f52': '开盘价',
    'f53': '收盘价',
    'f54': '最高价',
    'f55': '最低价',
    'f56': '成交量',
    'f57': '成交额',
    'f58': '振幅',
    'f59': '涨跌幅',
    'f60': '涨跌额'
}

class EastmoneyKlineFetcher:
    def __init__(self, stock_list, klt="101", fqt="1", lmt=120):
        self.ua = UserAgent()
        self.stock_list = stock_list  # 格式如：['0.002040', '1.600519']
        self.klt = klt  # K线类型
        self.fqt = fqt  # 复权方式
        self.lmt = lmt  # 数据条数限制

    def fetch_single(self, secid):
        headers = {
            "User-Agent": self.ua.random,
            "Referer": "https://quote.eastmoney.com/"
        }
        url = (
            f"https://push2his.eastmoney.com/api/qt/stock/kline/get?"
            f"secid={secid}&ut=fa5fd1943c7b386f172d6893dbfba10b"
            f"&fields1=f1,f2,f3,f4,f5,f6"
            f"&fields2={','.join(mapping_field.keys())}"
            f"&klt={self.klt}&fqt={self.fqt}&end=20500101&lmt={self.lmt}"
        )
        try:
            response = requests.get(url, headers=headers, timeout=10)
            json_data = response.json()
            if not json_data.get("data"):
                print(f"Warning: No data for {secid}")
                return None
            klines = json_data["data"]["klines"]
            df = pd.DataFrame(
                [line.split(',') for line in klines],
                columns=list(mapping_field.values())
            )
            df["股票代码"] = secid
            return df
        except Exception as e:
            print(f"Error fetching {secid}: {e}")
            return None

    def fetch_all(self):
        all_data = []
        for secid in self.stock_list:
            print(f"Fetching: {secid}")
            df = self.fetch_single(secid)
            if df is not None:
                all_data.append(df)
            time.sleep(0.5)  # 防止触发限速
        return pd.concat(all_data, ignore_index=True) if all_data else None
    

# fetcher = EastmoneyKlineFetcher(['0.002040', '1.600519'], klt='101', fqt='1', lmt=90)
# df = fetcher.fetch_all()
# print(df.head())



In [3]:
stock_scraper = StockScraper()
base_df = stock_scraper.scrape_all()
base_df.to_csv("股票实时基本数据.csv", index=False, encoding='utf-8-sig')

# 构造 secid 列表：根据代码前缀判断市场
def code_to_secid(code):
    return ("1." if code.startswith("6") else "0.") + code

secid_list = [code_to_secid(code) for code in base_df["代码"].astype(str)]

# 第二步：抓取所有股票的90天K线
kline_fetcher = EastmoneyKlineFetcher(secid_list)
kline_df = kline_fetcher.fetch_all()

if kline_df is not None:
    kline_df.to_csv("股票K线数据_90天.csv", index=False, encoding='utf-8-sig')
    print("已保存全部K线数据")
else:
    print("未获取到任何K线数据")

共 5233 条数据，105 页，每页 50 条
抓取第 1 页...
抓取第 2 页...
抓取第 3 页...
抓取第 4 页...
抓取第 5 页...
抓取第 6 页...
抓取第 7 页...
抓取第 8 页...
抓取第 9 页...
抓取第 10 页...
抓取第 11 页...
抓取第 12 页...
抓取第 13 页...
抓取第 14 页...
抓取第 15 页...
抓取第 16 页...
抓取第 17 页...
抓取第 18 页...
抓取第 19 页...
抓取第 20 页...
抓取第 21 页...
抓取第 22 页...
抓取第 23 页...
抓取第 24 页...
抓取第 25 页...
抓取第 26 页...
抓取第 27 页...
抓取第 28 页...
抓取第 29 页...
抓取第 30 页...
抓取第 31 页...
抓取第 32 页...
抓取第 33 页...
抓取第 34 页...
抓取第 35 页...
抓取第 36 页...
抓取第 37 页...
抓取第 38 页...
抓取第 39 页...
抓取第 40 页...
抓取第 41 页...
抓取第 42 页...
抓取第 43 页...
抓取第 44 页...
抓取第 45 页...
抓取第 46 页...
抓取第 47 页...
抓取第 48 页...
抓取第 49 页...
抓取第 50 页...
抓取第 51 页...
抓取第 52 页...
抓取第 53 页...
抓取第 54 页...
抓取第 55 页...
抓取第 56 页...
抓取第 57 页...
抓取第 58 页...
抓取第 59 页...
抓取第 60 页...
抓取第 61 页...
抓取第 62 页...
抓取第 63 页...
抓取第 64 页...
抓取第 65 页...
抓取第 66 页...
抓取第 67 页...
抓取第 68 页...
抓取第 69 页...
抓取第 70 页...
抓取第 71 页...
抓取第 72 页...
抓取第 73 页...
抓取第 74 页...
抓取第 75 页...
抓取第 76 页...
抓取第 77 页...
抓取第 78 页...
抓取第 79 页...
抓取第 80 页...
抓取第 81 页...
抓取第 82 页...


In [4]:
kline_df.describe()

Unnamed: 0,日期,开盘价,收盘价,最高价,最低价,成交量,成交额,振幅,涨跌幅,涨跌额,股票代码
count,618999,618999.0,618999.0,618999.0,618999.0,618999,618999.0,618999.0,618999.0,618999.0,618999.0
unique,170,15321.0,16219.0,15933.0,15321.0,325438,618557.0,2550.0,3736.0,3030.0,5184.0
top,2025-04-17,5.0,5.08,5.2,5.0,14853,333528335.0,0.0,0.0,0.0,0.300552
freq,5168,567.0,453.0,460.0,469.0,15,2.0,1823.0,14514.0,14509.0,240.0


In [5]:

kline_df.groupby("股票代码")["收盘价"].std().reset_index()


Unnamed: 0,股票代码,收盘价
0,0.000001,0.246715
1,0.000002,0.818113
2,0.000004,3.847641
3,0.000006,1.072719
4,0.000007,0.495514
...,...,...
5179,1.688799,1.610483
5180,1.688800,10.491093
5181,1.688819,1.957998
5182,1.688981,5.814669


In [6]:
df = kline_df
df["收盘价"] = pd.to_numeric(df["收盘价"], errors="coerce")
std_df = df.groupby("股票代码")["收盘价"].std().reset_index()
std_df.columns = ["股票代码", "收盘价标准差"]


In [8]:
std_df.sort_values('收盘价标准差')

Unnamed: 0,股票代码,收盘价标准差
3860,1.601916,0.043880
1499,0.200055,0.053156
1509,0.200539,0.062351
1490,0.200019,0.066383
1508,0.200530,0.070324
...,...,...
2881,0.301600,48.336147
3287,1.600519,50.508958
5075,1.688608,54.450578
4819,1.688256,104.379402


In [12]:
# 获取每只股票最早和最新的收盘价
first_close = kline_df.groupby("股票代码")["收盘价"].first()
last_close = kline_df.groupby("股票代码")["收盘价"].last()

# 计算涨幅
price_change = ((last_close - first_close) / first_close).dropna()
price_change.name = "涨幅"

# 计算RPS（基于涨幅的百分排名）
rps_series = price_change.rank(pct=True) * 100
rps_series.name = "RPS"

# 整合结果
rps_df = pd.concat([first_close, last_close, price_change, rps_series], axis=1).reset_index()
rps_df.columns = ["股票代码", "最早收盘价", "最新收盘价", "涨幅", "RPS"]

# 示例输出
rps_df.sort_values("RPS", ascending=False).head(20)

Unnamed: 0,股票代码,最早收盘价,最新收盘价,涨幅,RPS
3122,1.600289,1.31,6.84,4.221374,100.0
2202,0.300718,17.78,74.74,3.2036,99.98071
4656,1.688068,29.22,115.11,2.939425,99.96142
1667,0.300153,7.94,31.26,2.937028,99.94213
5088,1.688622,23.53,82.08,2.488313,99.92284
334,0.00088,10.18,33.41,2.281925,99.903549
1454,0.00301,14.65,47.78,2.261433,99.884259
5107,1.688656,27.56,89.3,2.240203,99.864969
94,0.000506,1.94,6.22,2.206186,99.845679
2663,0.30122,26.54,81.0,2.051997,99.826389


In [15]:
import plotly.graph_objects as go
import pandas as pd

def plot_stock_trend(kline_df: pd.DataFrame, stock_code: str):
    # 确保数值类型
    kline_df["收盘价"] = pd.to_numeric(kline_df["收盘价"], errors="coerce")
    kline_df["涨跌幅"] = pd.to_numeric(kline_df["涨跌幅"], errors="coerce")
    kline_df["成交量"] = pd.to_numeric(kline_df["成交量"], errors="coerce")

    # 筛选数据
    stock_df = kline_df[kline_df["股票代码"] == stock_code].copy()
    if stock_df.empty:
        print(f"股票代码 {stock_code} 没有找到对应数据")
        return

    stock_df["日期"] = pd.to_datetime(stock_df["日期"])
    stock_df = stock_df.sort_values("日期")

    fig = go.Figure()

    # 收盘价线
    fig.add_trace(go.Scatter(x=stock_df["日期"], y=stock_df["收盘价"],
                             mode="lines+markers",
                             name="收盘价", yaxis="y1"))

    # 涨跌幅线
    fig.add_trace(go.Scatter(x=stock_df["日期"], y=stock_df["涨跌幅"],
                             mode="lines+markers",
                             name="涨跌幅 (%)", yaxis="y2"))

    # 成交量柱状图
    fig.add_trace(go.Bar(x=stock_df["日期"], y=stock_df["成交量"],
                         name="成交量", yaxis="y3", opacity=0.4))

    fig.update_layout(
        title=f"{stock_code} - 近90日趋势图",
        xaxis=dict(title="日期"),
        yaxis=dict(title="收盘价", side="left", showgrid=False),
        yaxis2=dict(title="涨跌幅 (%)", overlaying="y", side="right", showgrid=False),
        yaxis3=dict(title="成交量", anchor="free", overlaying="y", side="right", position=0.95),
        legend=dict(x=0.01, y=0.99),
        height=600,
        width=1000
    )

    fig.show()


In [16]:
plot_stock_trend(kline_df, "0.002040")
plot_stock_trend(kline_df, "1.600519")



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

