/
us.py
206 lines (191 loc) · 8.3 KB
/
us.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import ast
import json
import os
import re
from urllib import request,parse
from litellm import completion
import feedparser
import numpy as np
import pandas as pd
from datetime import *
import time as t
import requests
from akshare.utils import demjson
from dotenv import load_dotenv,find_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed,wait
PROXY='http://127.0.0.1:7890'
load_dotenv(find_dotenv())
def getActive():
params = {
'formatted': 'false',
'lang': 'en-US',
'region': 'US',
'scrIds': 'most_actives',
'count': '250',
}
response = requests.get(
'https://query1.finance.yahoo.com/v1/finance/screener/predefined/saved',
params=params,
headers={'user-agent': 'Mozilla'},
proxies={'https':'http://127.0.0.1:7890'}
)
df=pd.DataFrame(response.json()['finance']['result'][0]['quotes'])
return df
def get_yf_rss(ticker):
yf_rss_url = 'https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s®ion=US&lang=en-US&count=100'
proxy_support = request.ProxyHandler({ 'https': PROXY})
opener = request.build_opener(proxy_support)
request.install_opener(opener)
feed = feedparser.parse(yf_rss_url % ticker)
df = pd.json_normalize(feed.entries)
df['published'] = pd.to_datetime(df["published"],format='mixed').dt.tz_convert('Asia/Shanghai')
return df
def getUrl(url,cookie=''):
retryTimes = 0
while retryTimes < 99:
try:
response = requests.get(url,headers={"user-agent": "Mozilla", "cookie": cookie,"Connection":"close"},timeout=5)
return response.text
except Exception as e:
print(e.args)
print('retrying.....')
t.sleep(60)
retryTimes += 1
continue
def renderHtml(df,filename:str,title:str):
df.index = np.arange(1, len(df) + 1)
df.index.name='No.'
df.reset_index(inplace=True)
#pd.set_option('colheader_justify', 'center')
html_string = '<html><head><title>%s</title>{style}</head><body>{table}{tablesort}</body></html>'%title
html_string = html_string.format(
table=df.to_html(render_links=True, escape=False, index=False),
style='<link rel="stylesheet" type="text/css" href="static/table.css"/>',
tablesort='<script src="static/tablesort.min.js"></script><script src="static/tablesort.number.min.js"></script><script>new Tablesort(document.getElementById("container"));</script>',
)
with open(filename, 'w') as f:
f.write(html_string.replace('<table border="1" class="dataframe">','<table id="container">').replace('<th>','<th role="columnheader">'))
def getK(symbol,period='week'):
k=tencentK('us',symbol, period)[-61:]
k1th = k['close'].values[0]
w=10000
return [[
int(x.strftime('%y%m%d')),
round(y['open'] / k1th*w-w), round(y['close'] / k1th*w - w), round(y['high'] / k1th*w - w), round(y['low'] / k1th*w - w)] for x, y in
k[-60:].iterrows()]
def tencentK(mkt:str = 'us',symbol: str = "QQQ",period='week') -> pd.DataFrame:
# symbol=symbol.lower()
# A股的mkt为''
if mkt=='us' and '.' not in symbol:
symbolTxt=requests.get(f"http://smartbox.gtimg.cn/s3/?q={symbol}&t=us").text
symbol = mkt + symbolTxt.split("~")[1].upper()
elif mkt=='hk':
symbol=mkt+symbol
"""
腾讯证券-获取有股票数据的第一天, 注意这个数据是腾讯证券的历史数据第一天
http://gu.qq.com/usQQQ.OQ/
:param symbol: 带市场标识的股票代码
:type symbol: str
:return: 开始日期
:rtype: pandas.DataFrame
"""
headers = {"user-agent": "Mozilla", "Connection": "close"}
url = "http://web.ifzq.gtimg.cn/appstock/app/fqkline/get?"
if mkt=='us':
url = "https://web.ifzq.gtimg.cn/appstock/app/usfqkline/get?"
temp_df = pd.DataFrame()
url_list=[]
params = {
"_var": f"kline_{period}qfq",
"param": f"{symbol},{period},,,320,qfq",
"r": "0.012820108110342066",
}
url_list.append(url + parse.urlencode(params))
# print(url_list)
with ThreadPoolExecutor(max_workers=10) as executor: # optimally defined number of threads
responeses = [executor.submit(getUrl, url) for url in url_list]
wait(responeses)
for res in responeses:
text=res.result()
try:
inner_temp_df = pd.DataFrame(
demjson.decode(text[text.find("={") + 1:])["data"][symbol][period]
)
except:
inner_temp_df = pd.DataFrame(
demjson.decode(text[text.find("={") + 1:])["data"][symbol]["qfq%s"%period]
)
temp_df = pd.concat([temp_df, inner_temp_df],ignore_index=True)
if temp_df.shape[1] == 6:
temp_df.columns = ["date", "open", "close", "high", "low", "amount"]
else:
temp_df = temp_df.iloc[:, :6]
temp_df.columns = ["date", "open", "close", "high", "low", "amount"]
temp_df.index = pd.to_datetime(temp_df["date"])
del temp_df["date"]
temp_df = temp_df.astype("float")
temp_df.drop_duplicates(inplace=True)
temp_df.rename(columns={'amount':'volume'}, inplace = True)
# temp_df.to_csv('Quotation/'+symbol+'.csv',encoding='utf-8',index_label='date',date_format='%Y-%m-%d')
return temp_df
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
ydf = getActive()
ydf.to_csv('ydf.csv')
ydf['marketCap'] = round(pd.to_numeric(ydf['marketCap'], errors='coerce') / 100000000, 2)
capsizes = {"Small": 10, "Middle": 100, "Large": 1000, "Mega": 2000}
for k,v in ydf.iterrows():
symbol=v['symbol']
ydf.at[k,'stock']='<a href="https://xueqiu.com/S/%s">%s<br>%s</a>'%(symbol,symbol,v['displayName'])
try:
news=get_yf_rss(symbol)
except:
continue
news['summary']=news['published'].dt.strftime('%Y-%m-%d ')+news['summary']
news.to_csv(symbol+'.csv')
if len(news)<2:
continue
newsTitles='\n'.join(news['summary'].values)[:2900]+'...'
prompt="{'%s(%s)相关资讯':'''%s''',\n}\n请根据资讯分析总结风险点和机会点,输出中文回答,回答格式为:{'chances':[机会点],'risks':[风险点],'tags':[题材标签]}"%(v['symbol'],v['longName'],newsTitles)
print('Prompt:\n%s'%prompt)
retry=2
while retry>0:
try:
replyTxt = replyTxt = completion(model='openai/gpt-3.5-turbo-1106', messages=[{
"role": "user",
"content": prompt,
}], api_key=os.environ['API_KEY'],api_base=os.environ['API_BASE_URL'])["choices"][0]["message"]["content"]
print('ChatGPT:\n%s'%replyTxt)
match = re.findall(r'{[^{}]*}', replyTxt)
content = match[-1]
parsed = ast.literal_eval(content)
if isinstance(parsed['chances'], list):
chances = '\n'.join( '%s. %s'%(x+1,parsed['chances'][x]) for x in range(len(parsed['chances'])))
else:
chances = parsed['chances']
if isinstance(parsed['risks'], list):
risks = '\n'.join('%s. %s'%(x+1,parsed['risks'][x]) for x in range(len(parsed['risks'])))
else:
risks = parsed['risks']
ydf.at[k, 'chances'] = chances.replace('\n', '<br>')
ydf.at[k, 'risks'] = risks.replace('\n', '<br>')
if '\n' in risks:
ydf.at[k, 'score'] = len(chances) - len(risks)
ydf.at[k,'tags'] = '<br>'.join(parsed['tags'])
break
except Exception as e:
print(e)
retry-=1
prompt+=',请务必保持python dict格式'
t.sleep(20)
continue
t.sleep(30)
ydf.dropna(subset=['score'],inplace=True)
ydf.sort_values(by=['score'],ascending=False,inplace=True)
ydf.to_csv('yahooFinance.csv')
ydf_w=ydf[['stock','chances','risks','tags','score']]
nowTxt=datetime.now().strftime('%Y-%m-%d')
renderHtml(ydf_w,nowTxt+'_us.html',nowTxt)
ydf_json=ydf[['symbol','chances','risks','tags','score']]
with open(nowTxt + '.json', 'w', encoding='utf_8_sig') as f:
json.dump({'columns':ydf_json.columns.tolist(),'data':ydf_json.values.tolist()}, f, ensure_ascii=False, indent=4)