In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
import akshare as ak
import os, sys
import baostock as bs
import concurrent.futures as cf

In [30]:
#删除某个列
def delete_columns(symbol, date):
    df = pd.read_csv(f'~/stock/a_daily/{symbol[2:]}.csv', header=0)
    df = df.drop(columns=['涨停'])
    if '连续涨停' in df.columns:
        df = df.drop(columns=['连续涨停'])
    df.to_csv(f'~/stock/a_daily/{symbol[2:]}.csv', index=False, float_format='%.15f')

#设置连续涨停天数列，显示当天连续涨停的天数
def add_limit_day_count_column(symbol):
    df = pd.read_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', header=0)
    df['pre_close'] = df.shift().apply(lambda row: row['close'], axis=1)
    if symbol[2:-3] == '300' or symbol[2:-3] == '301' or symbol[2:-3] == '688':
        df['limits'] = 0
        df['limit'] = 0
    else:
        df.loc[round(df['pre_close'] * 1.1, 2) <= df['close'], 'limit'] = 1
        df['limits'] = (df['limit'] != df['limit'].shift(1)).cumsum()
        # 计算连续涨停天数
        df['limits'] = df[df['limit'] == 1].groupby('limits').cumcount() + 1
    df['pre_close'] = df['pre_close'].fillna(0)
    df['limit'] = df['limit'].fillna(0)
    df['limits'] = df['limits'].fillna(0)
    df.to_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', index=False, float_format='%.15f')

#设置涨停列，标识为当天是否涨停
def add_limit_day_column(symbol, date):
    df = get_stock_day_base(f'{symbol[:2]}.{symbol[2:]}', date)
    df.rename(columns={"preclose": "pre_close", "adjustflag": "adjust_flag", "tradestatus": "trade_status", 
                       "pctChg": "pct_chg", "peTTM": "pe_ttm", "pbMRQ": "pb_mrq", "psTTM": "ps_ttm", "pcfNcfTTM": "pcf_ncf_ttm",
                       "isST": "is_st"}, inplace=True)
    df['code'] = df['code'][2:]
    df['circulation_market_value'] = df['amount'] / (df['turn'] * 100)
    if symbol[2:-3] == '300' or symbol[2:-3] == '301' or symbol[2:-3] == '688':
        df['up_limit'] = 0
        df['continuous_up_limit'] = 0
    else:
        #df.loc[round(df.shift()['up_limit'] * 1.1, 2) <= df['收盘'], 'up_limit'] = 1
        df.loc[round(df['pre_close'] * 1.1, 2) <= df['close'], 'up_limit'] = 1
        df['continuous_up_limit'] = (df['up_limit'] != df['up_limit'].shift(1)).cumsum()
        # 计算连续涨停天数
        df['continuous_up_limit'] = df[df['up_limit'] == 1].groupby('continuous_up_limit').cumcount() + 1
    
    df['up_limit'] = df['up_limit'].fillna(0)
    df['continuous_up_limit'] = df['continuous_up_limit'].fillna(0)
    df.to_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', index=False, float_format='%.15f')

#合并bs和ak结果
def merge_bs_ak_day(symbol):
    ak_df = pd.read_csv(f'~/stock/a_daily/{symbol[2:]}.csv', header=0)
    bs_df = pd.read_csv(f'~/stock/a_daily_detail/{symbol[2:]}.csv', header=0)
    ak_df['日期'] = pd.to_datetime(ak_df['日期'], format='%Y/%m/%d')
    ak_df.rename(columns={'日期':'date','开盘':'open','收盘':'close','最高':'high','最低':'low','成交量':'volume','成交额':'amount','振幅':'amplitude','涨跌幅':'fluctuations','涨跌额':'fluctuations_amount','换手率':'turn'}, inplace=True)
    bs_df['date'] = pd.to_datetime(bs_df['date'], format='%Y%m%d')
    merge_df = pd.merge(ak_df, bs_df, on='date', how='left')
    merge_df.to_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', index=False, float_format='%.15f')
#设置涨停列，标识为当天是否涨停
def yestoday_close(symbol):
    df = pd.read_csv(f'~/stock/a_daily/{symbol[2:]}.csv', header=0)
    if symbol[2:-3] == '300' or symbol[2:-3] == '301' or symbol[2:-3] == '688':
        df['涨停'] = 0
    else:
        df.loc[round(df.shift()['收盘'] * 1.1, 2) <= df['收盘'], '涨停'] = 1
        df['涨停'] = df['涨停'].fillna(0)
    df.to_csv(f'~/stock/a_daily_my/{symbol[2:]}.csv', index=False, float_format='%.15f')
#获取基本信息
def get_stock_day_base(symbol,start_date):
    rs = bs.query_history_k_data_plus(symbol,
        "date,tradestatus,pctChg,peTTM,pbMRQ,psTTM,pcfNcfTTM,isST",
        start_date=start_date,frequency="d", adjustflag="2")
    data_list = []
    while (rs.error_code == '0') & rs.next():
        # 获取一条记录，将记录合并在一起
        data_list.append(rs.get_row_data())
    df = pd.DataFrame(data_list, columns=rs.fields)
    df.to_csv(f'~/stock/a_daily_detail/{symbol[3:]}.csv', index=False, float_format='%.15f')
#转换日期
def transform_date(symbol):
    df = pd.read_csv(f'~/stock/a_daily_detail/{symbol[2:]}.csv', header=0)
    cols = ['tradestatus','pctChg','peTTM','pbMRQ','psTTM','pcfNcfTTM','isST']
    # 将处理空字符串的函数定义为 pd.to_numeric() 函数的 lambda 版本
    convert_func = lambda x: pd.to_numeric(x, errors='coerce') if x != '' else None
    # 使用 applymap() 方法将需要转换的列使用上述函数进行转换
    df[cols] = df[cols].applymap(convert_func)
    df.to_csv(f'~/stock/a_daily_detail/{symbol[2:]}.csv', index=False, float_format='%.15f')
#lg = bs.login()
    
start_time = '1997-01-01'
stock_codes = pd.read_csv('~/stock/stocks.csv')['代码']
arr = []
for stock in stock_codes:
    if stock[:2] == 'bj':
        continue
    add_limit_day_count_column(stock)
#bs.logout()

In [75]:
def limit_rate(symbol, date):
    df = pd.read_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', header=0)
    df = df.set_index('date')
    df = df[150:]
    df = df[df.index > date]
    df = df[df['isST'] == 0]
    df = df[df['amount'] / df['turn'] <= 10000000000]
    df = df[df.shift()['limits'] == 1]
    limit_day = df[df['limits'] == 2]
    no_limit_day = df[df['limits'] == 0]
    return [symbol, df.shape[0], limit_day.shape[0], no_limit_day.shape[0]]

def limit_rate2(symbol, date):
    df = pd.read_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', header=0)
    df = df.set_index('date')
    df = df[150:]
    df = df[df.index > date]
    df = df[df['isST'] == 0]
    df = df[df['amount'] / df['turn'] <= 10000000000]
    df = df[(df.shift()['limits'] == 1) & (df.shift()['turn'] < 6)]
    limit_day = df[df['limits'] == 2]
    no_limit_day = df[df['limits'] == 0]
    return [symbol, df.shape[0], limit_day.shape[0], no_limit_day.shape[0]]

def limit_rate3(symbol, date):
    df = pd.read_csv(f'~/stock/a_daily_2/{symbol[2:]}.csv', header=0)
    df = df.set_index('date')
    df = df[150:]
    df = df[df.index > date]
    df = df[df['isST'] == 0]
    arr = []
    flag = False
    code = symbol
    date = None
    win_days = 0
    for i, row in df.iterrows():
        if row['amount'] / (row['turn'] * 0.01) <= 10000000000 and row['limits'] > 0:
            date = i
            flag = True
            win_days = row['limits']
        elif row['limits'] > 0:
            win_days = row['limits']
        elif flag:
            flag = False
            arr.append([code, date, win_days])
    return arr

start_time = '2023-01-01'
stock_codes = pd.read_csv('~/stock/stocks.csv')['代码']
arr = []
for stock in stock_codes:
    if stock[:2] == 'bj':
        continue
    sub_arr = limit_rate3(stock, start_time)
    for row in sub_arr:
        arr.append(row)
        
df = pd.DataFrame(arr)
df.to_csv(f'~/stock/limits.csv', index=False, float_format='%.15f')

  if row['amount'] / (row['turn'] * 0.01) <= 10000000000 and row['limits'] > 0:


In [None]:
import pandas as pd

# 定义示例数据
data = {
    '日期': ['1997/1/2', '1997/1/3', '1997/1/6', '1997/1/7', '1997/1/8', '1997/1/9', '1997/1/10', '1997/1/13', '1997/1/14', '1997/1/15'],
    '涨停': [1, 1, 0, 0, 1, 1, 1, 1, 0, 0]
}
df = pd.DataFrame(data)

# 计算涨停值的分组编号
df['分组编号'] = (df['涨停'] != df['涨停'].shift(1)).cumsum()

# 计算连续涨停天数
df['第几天连续涨停'] = df[df['涨停'] == 1].groupby('分组编号').cumcount() + 1
df['第几天连续涨停'] = df['第几天连续涨停'].fillna(0)
print(df)
# 输出结果


In [3]:
datas = pd.read_csv(f'~/stock/stocks.csv')
print(type(datas))

<class 'pandas.core.frame.DataFrame'>
