In [1]:
import numpy as np
import pandas as pd
import tushare as ts
from utils import *
print(ts.__version__)

1.2.84


In [2]:
ts.set_token('1f2f092156dfe775a88e478f3a54565e0adab29246c83814e95fa0dd')
pro = ts.pro_api()

## 过滤概念板块

In [3]:
# 过滤概念板块
concept_stocks = pd.read_csv("./data/concept_stocks.csv")  # (34444, 6)
print(concept_stocks.shape, concept_stocks.columns, concept_stocks.tail())

concept_groups = concept_stocks.groupby("concept_name", as_index=False)  # 873 groups, indexed from TS0 to TS879
print(len(concept_groups))
#for name, group in concept_groups:
#    if len(group) > 223 or len(group)<=2:
#        print(name, len(group))
concept_groups = concept_groups.filter(lambda x: len(x)<=223 and len(x)>=3)
print(len(concept_groups), len(concept_groups.groupby("id")))  # .get_group("TS0")
concept_groups.to_csv("./data/concept_stocks_filtered_223_3.csv", index=False)  # 23928, 824
# {name:gdf for name,gdf in df.groupby('Declarer') if len(gdf) >= 5}

(34444, 6) Index(['id', 'concept_name', 'ts_code', 'name', 'in_date', 'out_date'], dtype='object')           id concept_name    ts_code  name   in_date  out_date
34439  TS879         3D打印  603112.SH  华翔股份  20210205       NaN
34440  TS879         3D打印  603928.SH  兴业股份  20210205       NaN
34441  TS879         3D打印  603977.SH  国泰集团  20210205       NaN
34442  TS879         3D打印  605376.SH  博迁新材  20210205       NaN
34443  TS879         3D打印  688333.SH   铂力特  20210205       NaN
873
23928 824


## 建图代码

In [4]:
# 建图
#  edge_index: [2, num_edges] 
def get_rind_map(indsty):
	groups = pd.read_csv(indsty).groupby('id', as_index=False)
	rind_map = {id: group["ts_code"].values.tolist() for id, group in groups}
	return rind_map


def set_edge(mat, stocks, stock_id_map):
	# stock-stock edges
	for i in range(len(stocks)):
		for j in range(i+1, len(stocks)):
			s = stock_id_map.get(stocks[i], -1)
			e = stock_id_map.get(stocks[j], -1)
			if s==-1 or e==-1: continue
			mat[s,e] = mat[e,s] = 1


def set_heteredge(idx, mat, stocks, stock_id_map):
	# stock-industry edges
	e = idx + len(stock_id_map)
	for i in range(len(stocks)):
		s = stock_id_map.get(stocks[i], -1)
		if s==-1 or e==-1: continue
		mat[s,e] = mat[e,s] = 1


def build_hetergraph(indsty, return_adj, stock_id_map):
	snode_nums = len(stock_id_map)
	rind_map = get_rind_map(indsty)

	mat = torch.eye(snode_nums+len(rind_map.keys()))  # add self loop

	for i, (ind, stocks) in enumerate(rind_map.items()):
		set_heteredge(i, mat, stocks, stock_id_map)
		
	edge = mat.nonzero()
	return snode_nums, snode_nums+len(rind_map.keys()), mat if return_adj == True else edge.transpose(0, 1)


def build_graph(indsty, return_adj, stock_id_map):
	node_nums = len(stock_id_map)
	rind_map = get_rind_map(indsty)

	mat = torch.eye(node_nums)  # add self loop

	for ind, stocks in rind_map.items():
		set_edge(mat, stocks, stock_id_map)
		
	edge = mat.nonzero()
	return mat if return_adj == True else edge.transpose(0, 1)

## 概念板块建图

In [None]:

stock_list_path = "./data/stock_codes_1931.txt"
stock_id_map = get_stock_id_mapping(stock_list_path)
adj = build_graph("./data/concept_stocks_filtered_223_3.csv", True, stock_id_map)

print(adj.shape)

In [None]:
np.save("./data/concepts_graph_1931_233_3.npy", adj.data)

In [None]:
# 给定0-1邻接矩阵，画图，x是node degree, y是节点数目/比例
from matplotlib import pyplot as plt

adj = adj - adj.diag()  # remove self-loop
degree_cnt = [0 for i in range(len(adj))]
edges = 0
for i in range(len(adj)):
    d = int(sum(adj[i]))
    edges += d
    degree_cnt[d] += 1

print(edges / (len(adj)**2))
plt.plot([i for i in range(1, 600)], degree_cnt[1:600])  # x, y
plt.xlabel("node degree")
plt.ylabel("node cnt")
plt.show()

## 算股票间ic值
- n*n matrix, then filter it

In [3]:
prices_ffill = np.load("./data/close_price_ffil_3404_1931.npy")  # labels not shifted
mask = np.load("./data/mask_3404_1931.npy")

In [10]:
import os
import numpy as np
from scipy.stats import pearsonr

# 20100104在第974行
stock_num = 1931
# 一年大概250个交易日， 一个月大概20个交易日
min_ic = 0.8
window_size = 750  # 5(week), 20(month), 60(quater), 250(1year), 750(3years), 1250(5years)
for i in range(973, 3404):
    st, ed = i-window_size, i  # [i-window, i-1]
    cur_corr = np.corrcoef(prices_ffill[st:ed, :], rowvar=False)  # each column is a variable
    new_corr = np.nan_to_num(cur_corr) # fillna with zero
    new_corr[new_corr < min_ic] = 0
    # edge_index = np.array(new_corr.nonzero())  # 1.2s, 1.9s
    edge_index = np.stack(new_corr.nonzero(), axis=0)  # 1.6s, 1.3s

    if not os.path.exists(f"./data/icgraph_window_{window_size}_{min_ic}/"):
        os.makedirs(f"./data/icgraph_window_{window_size}_{min_ic}/")
    np.savez_compressed(os.path.join(f"./data/icgraph_window_{window_size}_{min_ic}/", f"date_{i-973}"), adj=edge_index)
    

In [7]:
print(sum(sum(np.isnan(cur_corr))))  # 56.54%
# 当某一列数据全都相等时，ic值是nan，因为方差是0
ic_pos = cur_corr[cur_corr>0]  # 1523299
ic_neg = cur_corr[cur_corr<0]  # 97150
print(ic_pos.shape, ic_neg.shape)
# mean 0.6699, median 0.74, >0.6 1061219 28.46%, >=0.8 581145 15.59%
# mean -0.1686, median -0.1206, <-0.2 33156 0.889% <-0.5 5650 <-0.6 2784,  <-0.8 380 
# 负相关的边太稀疏了，忽略掉
print(max(ic_pos), min(ic_pos), np.mean(ic_pos), np.median(ic_pos), sum(ic_pos>0.8)/(1931*1931))

69192
(2352653,) (1306896,)
1.0 9.822263852377673e-18 0.5901733714544581 0.6526462231400282 0.19138770224211205


In [29]:
new_corr = np.nan_to_num(cur_corr)
new_corr[new_corr<0.6] = 0

In [40]:
edge_index = new_corr.nonzero()
print(edge_index.shape)
# np.save('./data/icgraph_window_600/date_0.npy', edge_index)  # [2, 1931]
# np.savez_compressed('./data/icgraph_window_600/date_0.npz', adj=edge_index)

AttributeError: 'tuple' object has no attribute 'shape'

In [36]:
train_price = np.load("./data/train_2305_1931_12.npy")
np.savez_compressed("./data/train_2305_1931_12.npz", data=train_price)

In [39]:
edge_index = np.load("./data/icgraph_window_600/date_0.npy")
print(edge_index.shape)

(1061219, 2)


In [30]:
import torch
t_corr = torch.from_numpy(new_corr).to_sparse()
print(t_corr.size())
torch.save(t_corr, "./data/icgraph_window_600/date_0.pt")

torch.Size([1931, 1931])


## 基金持仓建图
- pass，不是很能理解模型应该学到什么……

In [18]:
fund_list = pro.fund_basic(market='E', status='L') # E 场内，O场外, D摘牌 I发行 L上市中
print(fund_list.columns, fund_list.shape)  # E:[1613,25], 501摘牌，5发行, 1107上市中 O:[15000, 25]
fund_list.tail()
fund_list.to_csv("./data/fund_list_e_1107.csv")

Index(['ts_code', 'name', 'management', 'custodian', 'fund_type', 'found_date',
       'due_date', 'list_date', 'issue_date', 'delist_date', 'issue_amount',
       'm_fee', 'c_fee', 'duration_year', 'p_value', 'min_amount',
       'exp_return', 'benchmark', 'status', 'invest_type', 'type', 'trustee',
       'purc_startdate', 'redm_startdate', 'market'],
      dtype='object') (1107, 25)


In [20]:
# 沪深300LOF， 160706.SZ
# 广发小盘LOF， 162703.SZ
fund = pro.fund_portfolio(ts_code='160706.SZ')
print(fund.columns, fund.shape) 
fund.head()

Index(['ts_code', 'ann_date', 'end_date', 'symbol', 'mkv', 'amount',
       'stk_mkv_ratio', 'stk_float_ratio'],
      dtype='object') (4000, 8)


Unnamed: 0,ts_code,ann_date,end_date,symbol,mkv,amount,stk_mkv_ratio,stk_float_ratio
0,160706.SZ,20220330,20211231,601658.SH,9180.0,1800.0,0.46,0.0
1,160706.SZ,20220330,20211231,000100.SZ,9255.0,1500.0,0.46,0.0
2,160706.SZ,20220330,20211231,600837.SH,9808.0,800.0,0.49,0.0
3,160706.SZ,20220330,20211231,600309.SH,10100.0,100.0,0.5,0.0
4,160706.SZ,20220330,20211231,000625.SZ,10633.0,700.0,0.53,0.0


In [21]:
fund['ann_date'].unique()

array(['20220330', '20220121', '20211026', '20210827', '20210720',
       '20210421', '20210330', '20210121', '20201027', '20200718',
       '20200829', '20200422', '20200121', '20200408', '20191022',
       '20190717', '20190829', '20190420', '20190326', '20190121',
       '20181026', '20180825', '20180718', '20180421', '20180328',
       '20180122', '20171025', '20170826', '20170720', '20170424',
       '20170119', '20170329', '20161025', '20160827', '20160721',
       '20160420', '20160329', '20160121', '20151027', '20150829',
       '20150718'], dtype=object)

In [22]:
len(fund['symbol'].unique())   # 722

722

In [27]:
fund = fund.sort_values(by=['ann_date'])

In [30]:
for name, group in fund.groupby(['ann_date']):
    if name != '20150718':
        print(name, len(group))
        print(group)
        break

20150829 110
        ts_code  ann_date  end_date     symbol        mkv     amount  \
3999  160706.SZ  20150829  20150630  000729.SZ   848640.0    81600.0   
3917  160706.SZ  20150829  20150630  601216.SH    43326.0     1800.0   
3918  160706.SZ  20150829  20150630  600516.SH   724128.0    60800.0   
3919  160706.SZ  20150829  20150630  600578.SH   718865.0    80500.0   
3920  160706.SZ  20150829  20150630  300486.SZ     6435.0      500.0   
...         ...       ...       ...        ...        ...        ...   
3962  160706.SZ  20150829  20150630  002236.SZ  1062936.0    33300.0   
3961  160706.SZ  20150829  20150630  600153.SH    36771.0     2100.0   
3960  160706.SZ  20150829  20150630  601818.SH  6206880.0  1158000.0   
3959  160706.SZ  20150829  20150630  300059.SZ  5343723.0    84700.0   
3963  160706.SZ  20150829  20150630  000598.SZ   994323.0   103900.0   

      stk_mkv_ratio  stk_float_ratio  
3999           0.12             0.00  
3917           0.01             0.00  
3918 

## 公告数据

In [7]:
from utils import get_days_id_mapping, get_stock_id_mapping
stock_list_path = "./data/stock_codes_1931.txt"
days_list_path = "./data/trading_opendays_2431.txt"
stock_id_map = get_stock_id_mapping(stock_list_path)
days_id_map = get_days_id_mapping(days_list_path)

In [41]:
# 这一种方法不够好，看下一个cell的
ann_save_path = "./data/announcement_all.csv"

import os
print(os.getcwd())
for i, date in enumerate(days_id_map.keys()):
    ann = pd.read_csv(f"./crawler/announcement/{date[:4]}-{date[4:6]}-{date[6:]}.csv", quotechar='"', usecols=range(15))
    print(date, ann.shape)
    if i==0:
        ann.to_csv(ann_save_path, sep='|', index=False)
    else:
        ann.to_csv(ann_save_path, sep='|' , header=False, mode="a", index=False)

/home/hy/thesis
20100104 (342, 15)
20100105 (294, 15)
20100106 (222, 15)
20100107 (254, 15)
20100108 (295, 15)
20100111 (75, 15)
20100112 (337, 15)
20100113 (249, 15)
20100114 (183, 15)
20100115 (313, 15)
20100118 (62, 15)
20100119 (528, 15)
20100120 (323, 15)
20100121 (289, 15)
20100122 (280, 15)
20100125 (112, 15)
20100126 (355, 15)
20100127 (341, 15)
20100128 (422, 15)
20100129 (382, 15)
20100201 (107, 15)
20100202 (411, 15)
20100203 (351, 15)
20100204 (305, 15)
20100205 (384, 15)
20100208 (139, 15)
20100209 (622, 15)
20100210 (696, 15)
20100211 (451, 15)
20100212 (474, 15)
20100222 (53, 15)
20100223 (281, 15)
20100224 (238, 15)
20100225 (380, 15)
20100226 (618, 15)
20100301 (106, 15)
20100302 (429, 15)
20100303 (404, 15)
20100304 (380, 15)
20100305 (487, 15)
20100308 (51, 15)
20100309 (645, 15)
20100310 (519, 15)
20100311 (476, 15)
20100312 (449, 15)
20100315 (155, 15)
20100316 (945, 15)
20100317 (345, 15)
20100318 (799, 15)
20100319 (561, 15)
20100322 (132, 15)
20100323 (1108, 15)

In [5]:
ann_save_path = "../data/announcement_all_new.csv"

import os
print(os.getcwd())
with open(ann_save_path, 'w') as fout:
    fout.write("art_code,display_time,eiTime,language,notice_date,title,title_ch,title_en,ann_type,inner_code,market_code,short_name,stock_code,column_code,column_name\n")
    for i, date in enumerate(days_id_map.keys()):
        fname = f"./crawler/announcement/{date[:4]}-{date[4:6]}-{date[6:]}.csv"
        print(fname)
        with open(fname, 'r') as fin:
            for i, line in enumerate(fin):
                if i==0: continue
                newline = ''.join(line.split('"')[0]+line.split('"')[-1])
                newline = newline.strip().split(',')
                if len(newline[4]) < 10:
                    newline.pop(4)  # 有一些行多了一列？？

                date = newline[4] if len(newline[2])<10 or \
                        newline[2].split()[0]=='2013-03-14'or \
                        newline[2].split()[0]>newline[4].split()[0] \
                    else newline[2]  # eitime < notice_date
            
                display_date, display_time = date.split()[0].replace("-", ""), date.split()[1]
                
                if display_time[:2] >= '15':  # 当日15点之后发布，算作下一天的
                    display_date = increment_date(display_date, 1)
                while display_date not in days_id_map and display_date < "20200101":
                    display_date = increment_date(display_date, 1)
                
                # 按股票id和日期过滤
                if len(newline) != 15 or display_date not in days_id_map \
                    or (newline[12]+'.SZ' not in stock_id_map and newline[12]+'.SH' not in stock_id_map):
                    continue

                newline[1] = display_date
                newline[12] = (newline[12]+'.SZ') if newline[12]+'.SZ' in stock_id_map else newline[12]+'.SH'
                fout.write(','.join(newline)+'\n')
                
    '''
    ann = pd.read_csv(f"./crawler/announcement/{date[:4]}-{date[4:6]}-{date[6:]}.csv", quotechar='"', usecols=range(15))
    print(date, ann.shape)
    if i==0:
        ann.to_csv(ann_save_path, sep='|', index=False)
    else:
        ann.to_csv(ann_save_path, sep='|' , header=False, mode="a", index=False)
    '''

/home/hy/thesis
empty line??  ['AN201612270219295582', '', '', '', '2012-03-29 00:00:00', '中国重工关于公开发行可转换公司债券所涉购买资产暨关联交易的实施进展公告', '', '', '', '38754622927419', '1', '中国重工', '601989', '', '']
empty line??  ['AN201705040553092625', '', '', '', '2017-05-04 00:00:00', '新泉股份公开发行A股可转换公司债券预案公告', '', '', '', '39783046135659', '1', '新泉股份', '603179', '', '']
empty line??  ['AN201705040553092624', '', '', '', '2017-05-04 00:00:00', '新泉股份关于公开发行A股可转换公司债券摊薄即期回报对公司主要财务指标的影响及公司采取措施的公告', '', '', '', '39783046135659', '1', '新泉股份', '603179', '', '']
empty line??  ['AN201705100567732541', '', '', '', '2017-05-10 00:00:00', '济川药业公开发行可转换公司债券预案', '', '', '', '39574774942555', '1', '济川药业', '600566', '', '', '']
empty line??  ['AN201705100567732540', '', '', '', '2017-05-10 00:00:00', '济川药业关于公开发行可转换公司债券摊薄即期回报及填补措施的公告', '', '', '', '39574774942555', '1', '济川药业', '600566', '', '', '']
empty line??  ['AN201705100567732539', '', '', '', '2017-05-10 00:00:00', '济川药业关于本次公开发行可转换公司债券募集资金使用的可行性分析报告', '', '', '', '395747

In [3]:
ann_all = pd.read_csv("../data/announcement_all_new.csv")  # 2533797 -> 1139361

In [4]:
# 过滤公告类别，去掉样本太少的
ann_groups = ann_all.groupby(['column_name'], as_index=False)
print(len(ann_groups))  # 324
ann_groups = ann_groups.filter(lambda x: len(x)>=1000)
print(len(ann_groups), len(ann_groups.groupby("column_name")))  # 300:1126968 141; 1000:1097880 88
# {name:gdf for name,gdf in df.groupby('Declarer') if len(gdf) >= 5}
for name, group in ann_groups.groupby(['column_name']):
    print(name, len(group))

324
1097880 88
一季度报告全文 8097
一季度报告正文 3448
三季度报告全文 9373
三季度报告正文 3717
专项说明/独立意见 73872
业绩快报 1996
业绩预告 8783
中介机构报告 2499
保荐/核查意见 51619
保荐代表人（机构）变更 1215
借贷 2782
停牌公告 22237
公司关联方基本资料变更 3320
公司注册资本变更 2043
公司章程 6000
公司章程修订 9904
关注函 1685
关联交易 15536
其他 137166
其他增发事项公告 5627
内部控制报告 8023
分配方案决议公告 6661
分配方案实施 7569
分配预案 8473
募集资金使用情况报告 10672
募集资金使用进展情况 4846
募集资金补充流动资金 2344
半年度报告全文 9380
半年度报告摘要 12122
半年度财务报告 2057
变更募集资金投资项目 1028
召开股东大会提示性公告 9036
召开股东大会通知 27242
回购进展情况 2249
增加股东大会议案 1795
增发方案修订 1657
增发获准公告 1917
增发预案 6240
增资扩股 2067
复牌公告 4185
委托（受托）事项 3176
审计报告 19784
审计机构变更 2523
对外项目投资 4611
年度报告全文 9879
年度报告摘要 11891
年报问询函 2464
归还募集资金 1485
投资理财 10950
投资设立公司 4225
担保事项 15510
收购出售资产/股权 7406
月度经营情况 8930
权益变动报告书 5591
法律意见书 35725
澄清公告 1146
独立董事候选人声明 7004
独立董事提名人声明 5310
独立董事述职报告 9359
监事会决议公告 24512
社会责任报告 2743
签订协议 9468
管理办法/制度 10376
股东/实际控制人股份减持 6302
股东/实际控制人股份增持 5795
股东大会决议公告 19200
股东大会法律意见书 2723
股东大会资料 18629
股份质押、冻结 17587
股权激励对象名单 1201
股权激励计划 1156
股权激励进展公告 2795
股权转让 5205
股票 178361
股票交易异常波动 11341
获得补贴（资助） 3817
获得认证

In [29]:
cnt = []
for name, group in ann_groups.groupby(['display_time', 'stock_code']):
    # print(name, len(group))
    ugroup = group['column_name'].unique()
    if len(ugroup)>50:
        print(name, len(ugroup))
    cnt.append(len(group))

In [30]:
print(sum(cnt)/len(cnt), np.median(cnt) ,max(cnt))
# unique,300: 2.0362756724322844 1.0 27
# unique,1000: 2.0159272477561125 1.0 26
import matplotlib.pyplot as plt

# cnt.sort()


2.0159272477561125 1.0 26


In [31]:
from collections import Counter
cnt.sort()
print(Counter(cnt))
'''
300:
Counter({1: 278151, 2: 69662, 3: 22636, 4: 14291, 5: 10070, 6: 7189, 
7: 4964, 8: 3276, 9: 2322, 10: 1722, 11: 1547, 12: 1462, 13: 1357, 14: 1248, 
15: 1088, 16: 826, 17: 568, 18: 390, 19: 249, 20: 133, 21: 73, 22: 47, 
23: 17, 24: 15, 26: 5, 25: 4, 27: 2})

1000:
Counter({1: 273014, 2: 68060, 3: 22100, 4: 13209, 5: 9778, 6: 6968, 7: 4850, 
8: 3213, 9: 2235, 10: 1681, 11: 1434, 12: 1381, 13: 1320, 14: 1197, 15: 1073, 
16: 753, 17: 526, 18: 338, 19: 223, 20: 96, 21: 65, 22: 32, 23: 14, 24: 6, 25: 1, 26: 1})
'''

Counter({1: 273014, 2: 68060, 3: 22100, 4: 13209, 5: 9778, 6: 6968, 7: 4850, 8: 3213, 9: 2235, 10: 1681, 11: 1434, 12: 1381, 13: 1320, 14: 1197, 15: 1073, 16: 753, 17: 526, 18: 338, 19: 223, 20: 96, 21: 65, 22: 32, 23: 14, 24: 6, 25: 1, 26: 1})


'\nCounter({1: 278151, 2: 69662, 3: 22636, 4: 14291, 5: 10070, 6: 7189, \n7: 4964, 8: 3276, 9: 2322, 10: 1722, 11: 1547, 12: 1462, 13: 1357, 14: 1248, \n15: 1088, 16: 826, 17: 568, 18: 390, 19: 249, 20: 133, 21: 73, 22: 47, \n23: 17, 24: 15, 26: 5, 25: 4, 27: 2})\n'

In [33]:
print(len(cnt) / (1931*2431))  # 0.08%的日期才有事件

0.08810076815072702


In [35]:
def get_text_feature(data_path, stock_id_mapping, days_id_mapping, groups_id_mapping, date_col, stock_col, columns):
	# return List[List[List[str]]]
	# res = np.zeros((2431, 1931, 25))
	res = [[[] for i in range(1931)] for j in range(2431)]

	with open(data_path, "r") as f:
		idx = 0
		for line in f:
			idx += 1
			if idx==1:
				continue
			cols = line.strip().split(",")   # sep is "," by default
			date = cols[date_col]
			date_id = days_id_mapping.get(date, -1)
			while len(date) == 8 and date_id == -1 and date <= '20191231':
				date = increment_date(date, 1)
				date_id = days_id_mapping.get(date, -1)

			stock_id = stock_id_mapping.get(cols[stock_col], -1)

			if stock_id == -1 or date_id == -1:
				continue
			
			for c in columns:
				if cols[c] in groups_id_mapping:  # 如果有重复的，[1,1,1,2,3,3,3]，记为[1,2,3]
					# tmp = tokenizer(cols[c], padding='max_length', max_length=32, return_tensors='np')
					res[date_id][stock_id].append(int(groups_id_mapping[cols[c]]))
	
	print(len(res), len(res[0]), len(res[0][0]))
	for i in range(2431):
		for j in range(1931):
			res[i][j] = list(set(res[i][j]))  # remove duplicates
			while len(res[i][j]) < 25:
				res[i][j].append(0)
			while len(res[i][j]) > 25:
				res[i][j].pop(-1)
				print(i,j,res[i][j])
	return res


In [31]:
ann_groups = []
with open("./data/ann_groups_1000.txt", "r") as f:
    for line in f:
        ann_groups.append(line.strip().split()[0])
groups_id_map = dict(zip(ann_groups, list(range(1,len(ann_groups)+1))))  # pad=0

In [36]:
res = get_text_feature("../data/announcement_all_new.csv", stock_id_map, days_id_map, groups_id_map, 1, 12, [14])
print(len(res), len(res[0]), len(res[0][0]))
# 2431 1931 89

2431 1931 0
2224 1002 [5, 9, 15, 16, 18, 19, 21, 24, 25, 33, 34, 38, 42, 43, 45, 46, 49, 51, 57, 58, 59, 60, 63, 78, 79]
2431 1931 25


In [28]:
np.save("../data/ann_type_2431_1931_89.npy", res)

In [37]:
np.savez_compressed("../data/ann_type_2431_1931_25.npz", data=res)