-
Notifications
You must be signed in to change notification settings - Fork 0
/
choose CSI 300 Index.py
246 lines (202 loc) · 7.47 KB
/
choose CSI 300 Index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2017-09-14 23:40:10
# @Author : Your Name (you@example.org)
# @Link : http://example.org
# @Version : $Id$
import os
import numpy as np
import pandas as pd
import chardet
import requests
from bs4 import BeautifulSoup
import xlrd
#4个函数,一个导入股票名单,一个看财报说里面我们要的Roe,一个看股价(包括求每股净值),一个merge输出到表格
class caipaoPa(object):
"""docstring for caipaoPa"""
def __init__(self):
self.obData = pd.read_excel('./300.xlsx')
self.obData.columns = ['date','index code','index name','index name eng','constituent code',
'constituent name','constituent name eng','exchange']
self.compNums = self.obData.loc[:,['constituent code']].values
self.compPla = self.obData.loc[:,['exchange']].values
self.compName = self.obData.loc[:,['constituent name eng']].values
self.caibaoIndex = pd.DataFrame(columns=['公司名称','公司代码','上市地点','RoE2012','2013','2014','2015','2016',
'RoE连续5年超过20%','现时股价','股价低点(市盈率10)',
'股价正常(市盈率20)','股价高点(市盈率30)','是否可以购买'])
def RoE(self):
#每个公司在财报说的链接
compList = []
i = 0
for number in self.compNums:
compStr = 'http://caibaoshuo.cn/companies/' + str(self.compNums[i][0]).zfill(6)
compList.append(compStr)
i += 1
#计算每个公司RoE
compRoe = pd.DataFrame([])
r = 0
for item in compList:
#判断网页状态
dataSt = requests.get(compList[r])
#网页状态正常时
if dataSt.status_code == requests.codes.ok:
print('true')
compData = pd.read_html(compList[r])
#分解网页上的三个表格
f1 = compData[1]
f2 = compData[2]
f3 = compData[3]
#重命名索引,扔掉不要的列
f1.columns = ['name','2012','2013','2014','2015','2016','0']
f1.index = ['财务结构','负债占资产比率(%)','长期资金占不动产及设备比率(%)',
'偿债能力','流动比率(%)','速动比率(%)',
'营运能力','应收账款周转率(次/年)','应收账款周转天数(天)',
'存货周转率(次/年)','存货周转天数(天)','固定资产周转率(次/年) ',
'总资产周转率(次/年)','盈利能力','ROA=资产收益率(%)',
'ROE=净资产收益率(%)','税前纯益占实收资本比率(%)','毛利率(%)',
'营业利润率(%)','净利率(%)','基本每股收益(每股盈余EPS)',
'现金流量','现金流量比率','现金流量充当比率','现金再投资比率']
f1New = f1.dropna(axis=1, how='all')
#提取RoE
newCompRoe = f1New.loc[['ROE=净资产收益率(%)'],['2012','2013','2014','2015','2016']]
roeAr = newCompRoe.values.reshape((1,5))
#因为有的是string,有的是float,所以判断string格式,先把里面不能用的数据改成0.0
if roeAr.dtype == object:
roeAr[roeAr=='--'] = 0.0
roeAr = roeAr.astype(float)
newCompRoe = pd.DataFrame(roeAr)
# newCompRoe = np.ndarray.tolist(newCompRoe)
#网页状态异常时
else:
print('false')
newCompRoe = pd.DataFrame({'0' : 0.0,
'1' : 0.0,
'2' : 0.0,
'3' : 0.0,
'4' : 0.0}, index=[0])
print newCompRoe
newCompRoe.index = [r]
compRoe = compRoe.append(newCompRoe)
r += 1
#把RoE变成array
# self.compRoeAr = np.array(compRoe)
#储存RoE
print compRoe
roeData = pd.DataFrame(compRoe)
roeData.to_csv('/Users/eveshi/Documents/007project/learnPython/caibaoshuo/roeData.csv')
#判断RoE是否连续5年大于20%(也移动到最后一个整合函数)
def price(self):
#每个公司在雪球的链接
compListXQ = []
strCompPla = self.compPla.astype(str)
plaList = []
a = 0
for item in strCompPla:
if str(strCompPla[a][0]) == np.array('SHH'):
plaList.append('SH')
elif str(strCompPla[a][0]) == np.array('SHZ'):
plaList.append('SZ')
a += 1
b = 0
for number in self.compNums:
compStr = 'https://xueqiu.com/S/' + plaList[b] + str(self.compNums[b][0]).zfill(6)
compListXQ.append(compStr)
b += 1
#抓取页面中的每股净值和股价
nps = []
sp = []
headers = {
'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
}
c = 0
for item in compListXQ:
response = requests.get(compListXQ[c],headers = headers)
# 抓取每股净值
nps1 = BeautifulSoup(response.text,'lxml').find(title='最近报告期每股净资产,单位:人民币')
nps2 = nps1.find('span').get_text()
nps3 = float(nps2)
nps.append(nps3)
#抓取股价
sp1 = BeautifulSoup(response.text,'lxml').find(attrs = {"class":"currentInfo"}).find('strong')
if sp1 == None:
# sp2 = BeautifulSoup(response.text,'lxml').find(attrs = {"class":"stock-closed"}).get_text()
sp3 = 0.0
else:
sp2 = sp1['data-current']
sp3 = float(sp2)
sp.append(sp3)
c += 1
# #储存净值和股价
npsData = pd.DataFrame(nps)
npsData.to_csv('/Users/eveshi/Documents/007project/learnPython/caibaoshuo/npsData.csv')
spData = pd.DataFrame(sp)
spData.to_csv('/Users/eveshi/Documents/007project/learnPython/caibaoshuo/spData.csv')
#计算合理股价范围(最后决定把这一块放在最后一部分,这样前面的数据可以先储存)
def merge(self):
#读取所有已储存数据
readRoe = pd.read_csv('roeData.csv')
readNps = pd.read_csv('npsData.csv')
readSp = pd.read_csv('spData.csv')
compRoe2012 = readRoe.iloc[:,[1]].values
compRoe2013 = readRoe.iloc[:,[2]].values
compRoe2014 = readRoe.iloc[:,[3]].values
compRoe2015 = readRoe.iloc[:,[4]].values
compRoe2016 = readRoe.iloc[:,[5]].values
compRoe = readRoe.iloc[:,[1,2,3,4,5]].values
nps = readNps.iloc[:,[1]].values
sp = readSp.iloc[:,[1]].values
print sp
print nps
#计算股价合理范围
price10 = 10*compRoe2016*nps/100
price20 = 20*compRoe2016*nps/100
price30 = 30*compRoe2016*nps/100
print price10
#判断股价
spBool = []
i = 0
for item in price20:
if price20[i][0] > sp[i][0] and sp[i][0] != 0.0:
spBool.append(['YES'])
else:
spBool.append(['--'])
i += 1
spBoolAr = np.array(spBool)
spBoolAr.transpose
print spBoolAr
#判断RoE是否连续5年>20%
roeBool = []
i = 0
for item in compRoe:
if np.any(compRoe[i]<20) == True:
roeBool.append(['--'])
else:
roeBool.append(['YES'])
i += 1
#储存判断结果
boolData = np.array(roeBool)
boolData.transpose
print boolData
buy = []
i = 0
for item in roeBool:
if roeBool[i][0] == 'YES' and spBoolAr[i][0] == 'YES':
buy.append(['Buy'])
else:
buy.append(['--'])
i += 1
caibaoIn = np.concatenate((self.compName, self.compNums, self.compPla,
compRoe2012, compRoe2013, compRoe2014, compRoe2015, compRoe2016,
roeBool, sp, price10, price20, price30, spBool, buy),
axis=1)
self.caibaoIndex = pd.DataFrame(caibaoIn,
columns=['constituent name','constituent code','exchange','RoE in 2012',
'RoE in 2013','RoE in 2014','RoE in 2015','RoE in 2016',
'RoE>20% in 5 years','price now','low price',
'regular price','high price','good price','buy or not'])
print self.caibaoIndex
self.caibaoIndex.to_csv('/Users/eveshi/Documents/007project/learnPython/caibaoshuo/caibaoIndex.csv', encoding='utf-8')
caipaoPa().RoE()
caipaoPa().price()
caipaoPa().merge()
print("123")