In [2]:
import baostock as bs
import pandas as pd
from pandas import Series,DataFrame
from sqlalchemy import create_engine

# 获取因子数据

In [3]:
def get_previous_trade_dates(date):
    rs = bs.query_trade_dates(start_date=date[:5]+('12' if int(date[5:7])==1 else str(int(date[5:7])-1))+'-20', end_date=date).get_data()
    return rs[rs['is_trading_day']=='1']['calendar_date'].tail(2).head(1).values[0]

In [4]:
def get_all_stocks_code(date):
    '''
    获取A股所有股票代码
    date必须为交易日
    '''
    
    #### 获取证券信息 ####
    rs = bs.query_all_stock(day=date)
    pre_date = get_previous_trade_dates(date)
    pre_rs = bs.query_all_stock(day=pre_date)
    # 剔除当日IPO的股票
    codes = pd.merge(rs.get_data(),pre_rs.get_data(),how='inner',on='code')
    
    # 过滤指数信息
    con = codes.code.str.contains('^bj|^sh.000|^sz.399')
    return codes[~con].reset_index(drop=True)

In [4]:
def get_stock_k_data(code,start_date,end_date):
    '''
    获取股票月频前复权数据
    '''
    
    rs_k = bs.query_history_k_data_plus(code,
        "date,code,close,volume,turn",
        start_date=start_date, end_date=end_date,
        frequency="m", adjustflag="2")
    k_data = rs_k.get_data()
        
    rs_peTTM = bs.query_history_k_data_plus(code,
        "date,code,peTTM",
        start_date=start_date, end_date=end_date, 
        frequency="d", adjustflag="2")
    peTTM_data = rs_peTTM.get_data()
        
    print(code,'OK')
    return pd.merge(k_data,peTTM_data,how='left',on=['date','code'])

In [17]:
def save_stocks_data(start_date,end_date):
    '''
    得到所有A股股票月频前复权数据并保存
    '''
    
    #### 登陆系统 ####
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:'+lg.error_code)
    print('login respond  error_msg:'+lg.error_msg)
    
    # 获取股票数据
    codes = get_all_stocks_code(end_date)
    print('股票数据获取成功')
    stocks = codes['code'].apply(get_stock_k_data,start_date=start_date,end_date=end_date)
    data = pd.concat(stocks.values)
    
    #### 登出系统 ####
    bs.logout()
    
    # 保存股票数据
    connect = create_engine("mysql+pymysql://root:981106@localhost:3306/stocks?charset=utf8")
    data.to_sql('stocks_data',connect,index=False,if_exists='append')
    print('保存成功')
    return data

In [18]:
save_stocks_data('2020-12-01','2022-01-04')

login success!
login respond error_code:0
login respond  error_msg:success
logout success!


Unnamed: 0,date,code,close,volume,turn,peTTM
0,2020-12-31,sh.600000,8.7298983200,1080236641,3.680200,5.137672
1,2021-01-29,sh.600000,8.9824160400,1355501014,4.618100,5.012384
2,2021-02-26,sh.600000,9.5054884600,1834071870,6.248600,5.304270
3,2021-03-31,sh.600000,9.9113205100,1726451117,5.882000,5.530733
4,2021-04-30,sh.600000,9.0635824500,1033073493,3.519600,4.944420
...,...,...,...,...,...,...
8,2021-08-31,sh.600008,3.5960012400,2711529973,36.938900,11.042061
9,2021-09-30,sh.600008,3.5863345700,3790973492,51.644000,11.012378
10,2021-10-29,sh.600008,3.2480011200,1914784827,26.085000,9.973474
11,2021-11-30,sh.600008,3.1320010800,2307607361,31.436300,9.683536


# 获取股票池数据

In [46]:
def get_stocks_status(code,start_date,end_date):
    '''
    获取股票状态
    '''
    
    rs = bs.query_history_k_data_plus(code,
        "date,code,open,close,tradestatus,isST",
        start_date=start_date, end_date=end_date,
        frequency="d", adjustflag="2")
    
    print(code,'OK')
    return rs.get_data()

In [47]:
def get_pool_stocks(date):
    '''
    获取沪深300和中证500成分股
    '''
    
    # 获取沪深300成分股
    hs300 = bs.query_hs300_stocks(date=date).get_data()
    zz500 = bs.query_zz500_stocks(date=date).get_data()
    
    pool = pd.concat([hs300,zz500])
    print(date,'成分股OK')
    return pool

In [48]:
def get_pool_change_date(start_date,end_date):
    # 获取每月最后一个交易日
    rs = bs.query_history_k_data_plus('sh.600000',
        "date",
        start_date=start_date, end_date=end_date,
        frequency="m", adjustflag="2")
    freq_m = rs.get_data()
    return freq_m

In [49]:
def save_pool_data(start_date,end_date):
    
    # 登陆系统
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:'+lg.error_code)
    print('login respond  error_msg:'+lg.error_msg)
    
    # 获取每月最后一个交易日
    month_last_day = get_pool_change_date(start_date,end_date)
    pool_stock_update = month_last_day.apply(lambda x:get_pool_stocks(x['date']),axis=1)
    month_last_day['updateDate'] = pool_stock_update.apply(lambda x:x['updateDate'].unique()[0])
    total_stocks = pd.concat(pool_stock_update.values)
    include_stocks = Series(total_stocks['code'].unique())
    
    # pool_code = get_pool_stocks(end_date)
    stocks = include_stocks.apply(get_stocks_status,start_date=start_date, end_date=end_date)
    
    # 登出系统    
    bs.logout()
    connect = create_engine("mysql+pymysql://root:981106@localhost:3306/stocks?charset=utf8")
    month_last_day.to_sql('pool_change_stocks_date',connect,index=False,if_exists='append')
    
    connect = create_engine("mysql+pymysql://root:981106@localhost:3306/stocks?charset=utf8")
    total_stocks.to_sql('pool_all_stocks',connect,index=False,if_exists='append')
    
    pool = month_last_day.merge(pd.concat(stocks.values),how='left',on='date')
     # 保存股票数据
    connect = create_engine("mysql+pymysql://root:981106@localhost:3306/stocks?charset=utf8")
    pool.to_sql('pool_data',connect,index=False,if_exists='append')
    print('保存成功')
    return pool

In [50]:
save_pool_data('2009-12-01','2022-08-01')

login success!
login respond error_code:0
login respond  error_msg:success
sh.600000 OK
sh.600001 OK
sh.600004 OK
sh.600005 OK
sh.600006 OK
sh.600008 OK
sh.600009 OK
sh.600010 OK
sh.600011 OK
sh.600015 OK
sh.600016 OK
sh.600017 OK
sh.600018 OK
sh.600019 OK
sh.600022 OK
sh.600026 OK
sh.600027 OK
sh.600028 OK
sh.600029 OK
sh.600030 OK
sh.600031 OK
sh.600033 OK
sh.600036 OK
sh.600037 OK
sh.600048 OK
sh.600050 OK
sh.600058 OK
sh.600066 OK
sh.600068 OK
sh.600085 OK
sh.600087 OK
sh.600089 OK
sh.600096 OK
sh.600100 OK
sh.600102 OK
sh.600104 OK
sh.600108 OK
sh.600109 OK
sh.600110 OK
sh.600111 OK
sh.600117 OK
sh.600118 OK
sh.600123 OK
sh.600125 OK
sh.600132 OK
sh.600143 OK
sh.600150 OK
sh.600151 OK
sh.600153 OK
sh.600158 OK
sh.600169 OK
sh.600170 OK
sh.600176 OK
sh.600177 OK
sh.600183 OK
sh.600188 OK
sh.600196 OK
sh.600208 OK
sh.600210 OK
sh.600216 OK
sh.600219 OK
sh.600220 OK
sh.600221 OK
sh.600236 OK
sh.600251 OK
sh.600256 OK
sh.600266 OK
sh.600269 OK
sh.600270 OK
sh.600271 OK
sh.600276 OK
sh

sz.000426 OK
sz.000428 OK
sz.000429 OK
sz.000501 OK
sz.000503 OK
sz.000504 OK
sz.000507 OK
sz.000510 OK
sz.000511 OK
sz.000513 OK
sz.000514 OK
sz.000520 OK
sz.000522 OK
sz.000525 OK
sz.000532 OK
sz.000533 OK
sz.000537 OK
sz.000540 OK
sz.000541 OK
sz.000548 OK
sz.000550 OK
sz.000554 OK
sz.000563 OK
sz.000565 OK
sz.000571 OK
sz.000572 OK
sz.000573 OK
sz.000581 OK
sz.000584 OK
sz.000589 OK
sz.000594 OK
sz.000597 OK
sz.000598 OK
sz.000599 OK
sz.000600 OK
sz.000601 OK
sz.000602 OK
sz.000607 OK
sz.000608 OK
sz.000609 OK
sz.000616 OK
sz.000617 OK
sz.000619 OK
sz.000627 OK
sz.000655 OK
sz.000659 OK
sz.000666 OK
sz.000677 OK
sz.000679 OK
sz.000682 OK
sz.000683 OK
sz.000687 OK
sz.000695 OK
sz.000698 OK
sz.000707 OK
sz.000708 OK
sz.000712 OK
sz.000713 OK
sz.000726 OK
sz.000727 OK
sz.000731 OK
sz.000733 OK
sz.000735 OK
sz.000737 OK
sz.000748 OK
sz.000755 OK
sz.000759 OK
sz.000762 OK
sz.000777 OK
sz.000786 OK
sz.000799 OK
sz.000815 OK
sz.000816 OK
sz.000818 OK
sz.000819 OK
sz.000823 OK
sz.000826 OK

sh.600483 OK
sh.600751 OK
sh.603025 OK
sh.603355 OK
sh.603567 OK
sh.603568 OK
sh.603589 OK
sh.603806 OK
sh.603883 OK
sz.002085 OK
sz.002180 OK
sz.002217 OK
sz.002354 OK
sz.002439 OK
sz.002568 OK
sz.002657 OK
sz.002745 OK
sz.300026 OK
sz.300055 OK
sz.300085 OK
sz.300122 OK
sz.300182 OK
sz.300257 OK
sz.001979 OK
sh.600666 OK
sh.600871 OK
sz.002027 OK
sh.600053 OK
sh.600466 OK
sh.600687 OK
sh.601155 OK
sh.603198 OK
sz.000008 OK
sz.000810 OK
sz.002002 OK
sz.002074 OK
sz.002131 OK
sz.002261 OK
sz.002268 OK
sz.002366 OK
sz.002426 OK
sz.002512 OK
sz.002589 OK
sz.002602 OK
sz.002624 OK
sz.002640 OK
sz.002699 OK
sz.300001 OK
sz.300010 OK
sz.300032 OK
sz.300113 OK
sz.300166 OK
sz.300287 OK
sh.600297 OK
sh.601127 OK
sh.601611 OK
sz.002797 OK
sh.600338 OK
sh.600862 OK
sh.601689 OK
sh.603377 OK
sh.603528 OK
sh.603866 OK
sh.603868 OK
sz.000025 OK
sz.000723 OK
sz.000761 OK
sz.000806 OK
sz.002176 OK
sz.002407 OK
sz.002460 OK
sz.002491 OK
sz.002544 OK
sz.002581 OK
sz.002707 OK
sz.300136 OK
sz.300156 OK

Unnamed: 0,date,change_date,code,open,close,tradestatus,isST
0,2009-12-31,2009-12-28,sh.600000,5.6436721400,5.6332834200,1,0
1,2009-12-31,2009-12-28,sh.600004,5.0838914000,5.0838914000,1,0
2,2009-12-31,2009-12-28,sh.600005,7.1150140800,7.2196466400,1,0
3,2009-12-31,2009-12-28,sh.600006,6.0577308000,6.0226135200,1,0
4,2009-12-31,2009-12-28,sh.600008,2.5766613900,2.6018170800,1,0
...,...,...,...,...,...,...,...
219459,2022-07-29,2022-06-13,sz.002756,146.4200000000,142.7500000000,1,0
219460,2022-07-29,2022-06-13,sz.300073,97.0000000000,95.7100000000,1,0
219461,2022-07-29,2022-06-13,sz.300390,79.3300000000,77.9000000000,1,0
219462,2022-07-29,2022-06-13,sz.300769,356.4900000000,358.3800000000,1,0


In [1]:
# 获取沪深300和中证500信息
def get_base_info():
    # 登陆系统
    lg = bs.login()
    # 显示登陆返回信息
    print('login respond error_code:'+lg.error_code)
    print('login respond  error_msg:'+lg.error_msg)
    
    rs = bs.query_history_k_data_plus("sh.000300",
        "date,code,close",start_date='2009-12-01', end_date='2022-08-01', frequency="m")
    hs300_info = rs.get_data()
    rs = bs.query_history_k_data_plus("sh.000905",
        "date,code,close",start_date='2009-12-01', end_date='2022-08-01', frequency="m")
    zz500_info = rs.get_data()

    # 登出系统
    bs.logout()
    hs300_info['profit'] = hs300_info['close'].astype(np.float64).pct_change()
    zz500_info['profit'] = zz500_info['close'].astype(np.float64).pct_change()
    base = pd.concat([hs300_info,zz500_info])
    # hs300_info.dropna(inplace=True)
    # zz500_info.dropna(inplace=True)
    base.dropna(inplace=True)
    con = create_engine("mysql+pymysql://root:981106@localhost:3306/stocks?charset=utf8")
    base.to_sql('base_data',con,index=False,if_exists='append')
    return base

In [None]:
# 获取沪深300和中证500信息
get_base_info()