In [2]:
import akshare as ak
import numpy as np
import pandas as pd
import sqlite3
from pathlib import Path
from datetime import datetime
import tushare as ts

tdy = datetime.today()
tdy_date = tdy.strftime('%Y%m%d')

# 用来屏蔽方法内print
class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

# 快速连接数据库
class fast_db_conn:
    def __init__(self,db_path):
        self._db = db_path
    def __enter__(self):
        self._conn = sqlite3.connect(self._db)
        print("打开连接 {}".format(self._db))
        return self._conn
    def __exit__(self, exc_type, exc_val, exc_tb):
        self._conn.commit()
        self._conn.close()
        print("关闭连接 {}".format(self._db))

# 声明路径
cur_path = Path(r'.')
print("当前路径为 {}".format(cur_path.resolve()))
gitee_path = cur_path / Path(r'./../..')    # D:/gitee
db_file_path = gitee_path / Path(r'./db')     # D:/gitee/db
db_name_stock = Path(r'./stock.db')
datapath = Path(r'./../../data/tushare_old')
dbpath = (db_file_path/db_name_stock).resolve()
ddl_path = Path(r'./sql/em_stock.sql').resolve()
print("数据库路径为 {}".format(dbpath.resolve()))


当前路径为 E:\gitee\finance\jupyterFile
数据库路径为 E:\gitee\db\stock.db


In [3]:
ak.stock_zh_a_spot_em().head()

Unnamed: 0,序号,代码,名称,最新价,涨跌幅,涨跌额,成交量,成交额,振幅,最高,...,量比,换手率,市盈率-动态,市净率,总市值,流通市值,涨速,5分钟涨跌,60日涨跌幅,年初至今涨跌幅
0,1,603130,N云中马,28.4,44.02,8.68,89109.0,252107900.0,24.04,28.4,...,,25.46,35.47,3.24,3976000000.0,994000000.0,0.0,0.0,44.02,44.02
1,2,688376,N美埃,37.7,29.15,8.51,198738.0,735526300.0,31.69,43.85,...,,64.4,44.18,3.52,5066880000.0,1163371000.0,0.19,-0.66,29.15,29.15
2,3,300515,三德科技,15.76,20.03,2.63,192893.0,286585900.0,20.64,15.76,...,4.32,10.65,39.24,4.85,3243747000.0,2853132000.0,0.0,0.0,33.56,20.21
3,4,301389,隆扬电子,24.38,19.98,4.06,346505.0,793475600.0,20.32,24.38,...,2.91,51.55,38.79,3.01,6911730000.0,1638909000.0,0.0,0.0,8.36,8.36
4,5,300847,中船汉光,19.0,15.92,2.61,316681.0,586698500.0,18.55,19.44,...,2.04,20.97,50.59,4.57,5624190000.0,2869501000.0,0.05,-0.05,40.53,10.79


In [2]:
def stock_data_collect(db, tb_name, code_list, start, end, adjust):
#     用于记录两阶段耗时
    t1,t3 = [],[]
    with fast_db_conn(db) as c:
        for i,code in enumerate(code_list):
            start_tm = datetime.now()
            try: df =  ak.stock_zh_a_hist(symbol=code, period="daily", start_date=start, end_date=end,adjust=adjust)
            except KeyError as e: print(e)
            if df.empty: continue
            after_ak = datetime.now()
            df["code"] = code
            df["pk"] = df["code"] + '_' + df["日期"]
            df.to_sql(name=tb_name, con=c, if_exists='append', index=False)
            after_insert = datetime.now()
            t1.append((after_ak - start_tm).total_seconds())
            t3.append((after_insert - after_ak).total_seconds())
            if not ((i+1) % 200): 
                c.commit()
                print("完成 {} commit, 两阶段平均耗时为 {} {}".format(i+1,np.array(t1).mean().round(2),np.array(t3).mean().round(2)))
                t1,t3 = [],[]


In [3]:
# 获取区数当天股票代码   这里需要价格名称映射
em_stock_spot = ak.stock_zh_a_spot_em()
em_code = list(set(em_stock_spot['代码']))
print("今天 {} em有 {} 支股票".format(tdy.strftime('%Y-%m-%d'),len(em_code)))   # 今天 2022-11-16 22:49:43.035745 em有 5195 支股票

start = datetime(2022,1,1)
end = datetime(2022,11,19)
start_date = start.strftime('%Y%m%d')
end_date = end.strftime('%Y%m%d')

tb_name_em = 'em_stock_{}_{}_{}'.format(start_date,end_date,tdy_date)
tb_name_em_hfq = 'em_stock_hfq_{}_{}_{}'.format(start_date,end_date,tdy_date)

今天 2022-11-19 em有 5200 支股票


In [4]:
# 建表
with open(ddl_path,'r',encoding='utf-8') as tt:ddl_sql = tt.read().format(tb_name_em)
with fast_db_conn(dbpath) as ddl_c: ddl_c.executescript(ddl_sql)
print('{} 已创建'.format(tb_name_em))   

with open(ddl_path,'r',encoding='utf-8') as tt:ddl_sql = tt.read().format(tb_name_em_hfq)
with fast_db_conn(dbpath) as ddl_c: ddl_c.executescript(ddl_sql)
print('{} 已创建'.format(tb_name_em_hfq))   

In [6]:
# 不复权数据
stock_data_collect(dbpath, tb_name_em, em_code, start_date, end_date, adjust="")


打开连接 E:\gitee\db\stock.db
完成 200 commit, 两阶段平均耗时为 0.31 0.15
完成 400 commit, 两阶段平均耗时为 0.31 0.17
完成 600 commit, 两阶段平均耗时为 0.29 0.19
完成 800 commit, 两阶段平均耗时为 0.29 0.2
完成 1000 commit, 两阶段平均耗时为 0.31 0.2
完成 1200 commit, 两阶段平均耗时为 0.3 0.19
完成 1400 commit, 两阶段平均耗时为 0.28 0.2
完成 1600 commit, 两阶段平均耗时为 0.28 0.21
完成 1800 commit, 两阶段平均耗时为 0.28 0.2
完成 2000 commit, 两阶段平均耗时为 0.28 0.21
完成 2200 commit, 两阶段平均耗时为 0.28 0.2
完成 2400 commit, 两阶段平均耗时为 0.28 0.19
完成 2600 commit, 两阶段平均耗时为 0.3 0.2
完成 2800 commit, 两阶段平均耗时为 0.3 0.2
完成 3000 commit, 两阶段平均耗时为 0.3 0.2
完成 3200 commit, 两阶段平均耗时为 0.3 0.2
完成 3400 commit, 两阶段平均耗时为 0.3 0.2
完成 3600 commit, 两阶段平均耗时为 0.28 0.19
完成 3800 commit, 两阶段平均耗时为 0.28 0.2
完成 4000 commit, 两阶段平均耗时为 0.28 0.2
完成 4200 commit, 两阶段平均耗时为 0.27 0.2
完成 4400 commit, 两阶段平均耗时为 0.28 0.21
完成 4600 commit, 两阶段平均耗时为 0.28 0.2
完成 4800 commit, 两阶段平均耗时为 0.29 0.2
完成 5000 commit, 两阶段平均耗时为 0.31 0.19
完成 5200 commit, 两阶段平均耗时为 0.3 0.21
关闭连接 E:\gitee\db\stock.db


In [7]:
# hfq
stock_data_collect(dbpath, tb_name_em_hfq, em_code, start_date, end_date, adjust="hfq")

打开连接 E:\gitee\db\stock.db
完成 200 commit, 两阶段平均耗时为 0.3 0.21
完成 400 commit, 两阶段平均耗时为 0.31 0.21
完成 600 commit, 两阶段平均耗时为 0.3 0.24
完成 800 commit, 两阶段平均耗时为 0.3 0.24
完成 1000 commit, 两阶段平均耗时为 0.3 0.22
完成 1200 commit, 两阶段平均耗时为 0.3 0.21
完成 1400 commit, 两阶段平均耗时为 0.31 0.21
完成 1600 commit, 两阶段平均耗时为 0.3 0.22
完成 1800 commit, 两阶段平均耗时为 0.31 0.21
完成 2000 commit, 两阶段平均耗时为 0.33 0.21
完成 2200 commit, 两阶段平均耗时为 0.34 0.21
完成 2400 commit, 两阶段平均耗时为 0.34 0.21
完成 2600 commit, 两阶段平均耗时为 0.67 0.2
完成 2800 commit, 两阶段平均耗时为 1.28 0.18
完成 3000 commit, 两阶段平均耗时为 1.21 0.19
完成 3200 commit, 两阶段平均耗时为 1.04 0.19
完成 3400 commit, 两阶段平均耗时为 1.25 0.18
完成 3600 commit, 两阶段平均耗时为 0.99 0.18
完成 3800 commit, 两阶段平均耗时为 0.3 0.2
完成 4000 commit, 两阶段平均耗时为 0.3 0.2
完成 4200 commit, 两阶段平均耗时为 0.3 0.21
完成 4400 commit, 两阶段平均耗时为 0.3 0.21
完成 4600 commit, 两阶段平均耗时为 0.3 0.22
完成 4800 commit, 两阶段平均耗时为 0.31 0.22
完成 5000 commit, 两阶段平均耗时为 0.29 0.21
完成 5200 commit, 两阶段平均耗时为 0.28 0.22
关闭连接 E:\gitee\db\stock.db


In [16]:
ak.index_stock_cons_weight_csindex()

Unnamed: 0,日期,指数代码,指数名称,指数英文名称,成分券代码,成分券名称,成分券英文名称,交易所,交易所英文名称,权重
0,2022-10-31,000300,沪深300,CSI 300,300014,亿纬锂能,Eve Energy Co Ltd,深圳证券交易所,Shenzhen Exchange,0.573
1,2022-10-31,000300,沪深300,CSI 300,300015,爱尔眼科,Aier Eye Hospital Group Co Ltd,深圳证券交易所,Shenzhen Exchange,0.529
2,2022-10-31,000300,沪深300,CSI 300,600150,中国船舶,China CSSC Holdings Limited,上海证券交易所,Shanghai Exchange,0.289
3,2022-10-31,000300,沪深300,CSI 300,600161,天坛生物,Beijing Tiantan Biological Products Co Ltd,上海证券交易所,Shanghai Exchange,0.104
4,2022-10-31,000300,沪深300,CSI 300,600188,兖矿能源,Yankuang Energy Group Company Limited,上海证券交易所,Shanghai Exchange,0.226
...,...,...,...,...,...,...,...,...,...,...
295,2022-10-31,000300,沪深300,CSI 300,601825,沪农商行,"Shanghai Rural Commercial Bank Co.,Ltd.",上海证券交易所,Shanghai Exchange,0.031
296,2022-10-31,000300,沪深300,CSI 300,601728,中国电信,China Telecom Corporation Limited,上海证券交易所,Shanghai Exchange,0.130
297,2022-10-31,000300,沪深300,CSI 300,601868,中国能建,China Energy Engineering Corporation Limited,上海证券交易所,Shanghai Exchange,0.181
298,2022-10-31,000300,沪深300,CSI 300,600941,中国移动,China Mobile Limited,上海证券交易所,Shanghai Exchange,0.185


### 下面获取股指指数以及成分股

In [17]:
ak.index_stock_cons_csindex()

Unnamed: 0,日期,指数代码,指数名称,指数英文名称,成分券代码,成分券名称,成分券英文名称,交易所,交易所英文名称
0,2022-11-18,000300,沪深300,CSI 300,002236,大华股份,Zhejiang Dahua Technology Co Ltd,深圳证券交易所,Shenzhen Exchange
1,2022-11-18,000300,沪深300,CSI 300,601111,中国国航,Air China Ltd,上海证券交易所,Shanghai Exchange
2,2022-11-18,000300,沪深300,CSI 300,002064,华峰化学,"Huafon Chemical Co., Ltd.",深圳证券交易所,Shenzhen Exchange
3,2022-11-18,000300,沪深300,CSI 300,000876,新 希 望,"NEW HOPE LIUHE CO., LTD",深圳证券交易所,Shenzhen Exchange
4,2022-11-18,000300,沪深300,CSI 300,601088,中国神华,China Shenhua Energy Co Ltd,上海证券交易所,Shanghai Exchange
...,...,...,...,...,...,...,...,...,...
295,2022-11-18,000300,沪深300,CSI 300,601865,福莱特,"Flat Glass Group CO.,LTD.",上海证券交易所,Shanghai Exchange
296,2022-11-18,000300,沪深300,CSI 300,688008,澜起科技,"Montage Technology Co., Ltd.",上海证券交易所,Shanghai Exchange
297,2022-11-18,000300,沪深300,CSI 300,688012,中微公司,Advanced Micro-Fabrication Equipment Inc. China,上海证券交易所,Shanghai Exchange
298,2022-11-18,000300,沪深300,CSI 300,603290,斯达半导,StarPower Semiconductor Ltd.,上海证券交易所,Shanghai Exchange


In [7]:
ak.index_stock_cons_csindex()

Unnamed: 0,日期,指数代码,指数名称,指数英文名称,成分券代码,成分券名称,成分券英文名称,交易所,交易所英文名称
0,2022-11-18,000300,沪深300,CSI 300,002236,大华股份,Zhejiang Dahua Technology Co Ltd,深圳证券交易所,Shenzhen Exchange
1,2022-11-18,000300,沪深300,CSI 300,601111,中国国航,Air China Ltd,上海证券交易所,Shanghai Exchange
2,2022-11-18,000300,沪深300,CSI 300,002064,华峰化学,"Huafon Chemical Co., Ltd.",深圳证券交易所,Shenzhen Exchange
3,2022-11-18,000300,沪深300,CSI 300,000876,新 希 望,"NEW HOPE LIUHE CO., LTD",深圳证券交易所,Shenzhen Exchange
4,2022-11-18,000300,沪深300,CSI 300,601088,中国神华,China Shenhua Energy Co Ltd,上海证券交易所,Shanghai Exchange
...,...,...,...,...,...,...,...,...,...
295,2022-11-18,000300,沪深300,CSI 300,601865,福莱特,"Flat Glass Group CO.,LTD.",上海证券交易所,Shanghai Exchange
296,2022-11-18,000300,沪深300,CSI 300,688008,澜起科技,"Montage Technology Co., Ltd.",上海证券交易所,Shanghai Exchange
297,2022-11-18,000300,沪深300,CSI 300,688012,中微公司,Advanced Micro-Fabrication Equipment Inc. China,上海证券交易所,Shanghai Exchange
298,2022-11-18,000300,沪深300,CSI 300,603290,斯达半导,StarPower Semiconductor Ltd.,上海证券交易所,Shanghai Exchange


In [13]:
aaa = ak.index_stock_info()
aaa

Unnamed: 0,index_code,display_name,publish_date
0,000001,上证指数,1991-07-15
1,000002,A股指数,1992-02-21
2,000003,B股指数,1992-02-21
3,000004,工业指数,1993-05-03
4,000005,商业指数,1993-05-03
...,...,...,...
700,399994,中证信息安全主题指数,2015-03-12
701,399995,中证基建工程指数,2015-03-12
702,399996,中证智能家居指数,2014-09-17
703,399997,中证白酒指数,2015-01-21


In [14]:
aaa.query("index_code == '931463'") 

Unnamed: 0,index_code,display_name,publish_date


In [15]:
ak.index_stock_cons(symbol="000300")

Unnamed: 0,品种代码,品种名称,纳入日期
0,300661,圣邦股份,2022-06-13
1,600460,士兰微,2022-06-13
2,600941,中国移动,2022-06-13
3,000792,盐湖股份,2022-06-13
4,601825,沪农商行,2022-06-13
...,...,...,...
295,000157,中联重科,2005-04-08
296,000069,华侨城A,2005-04-08
297,000063,中兴通讯,2005-04-08
298,000001,深发展A,2005-04-08


In [20]:
index_df = pd.read_html("https://www.joinquant.com/data/dict/indexData")[0]
index_df["指数代码"] = index_df["指数代码"].str.split(".", expand=True)[0]
index_df.columns = ["index_code", "display_name", "publish_date", "-", "short"]

In [25]:
index_df[["index_code", "display_name", "publish_date", "short"]].to_excel("index_list.xlsx",sheet_name='index',index='False')