In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = 'http://haoetf.com/'

html = requests.get(url).content.decode('utf-8')

soup = BeautifulSoup(html,features='lxml')

soup.prettify()

'<!DOCTYPE html>\n<html lang="en">\n <head>\n  <meta charset="utf-8"/>\n  <meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"/>\n  <meta content="HaoETF,理财,量化投资,可转债,分级基金,基金,ETF,LOF,QDII" name="keywords"/>\n  <meta content="HaoETF，理财源自数据" name="description"/>\n  <title>\n   HaoETF 理财源自数据\n  </title>\n  <link crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/bootstrap@4.4.1/dist/css/bootstrap.min.css" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" rel="stylesheet"/>\n  <!-- Favicons -->\n  <link href="/static/img/favicons/favicon-180x180.png" rel="apple-touch-icon" sizes="180x180"/>\n  <link href="/static/img/favicons/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/>\n  <link href="/static/img/favicons/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/>\n  <link href="/static/img/favicons/favicon.ico" rel="icon"/>\n  <style>\n   .bd-placeholder-img {\nfont-size: 1.125rem;\ntext-anchor:

In [2]:
#获取表头
thead = soup.body.table.thead.tr

thead_list = []
for th in thead.contents :
    if th.string != '\n' :
        thead_list.append(th.string)

thead_list

['代码',
 '名称',
 '溢价率',
 '现价',
 '涨跌',
 '成交量(万手)',
 '成交额(万元)',
 '净值',
 '净值日期',
 'T-1估值',
 '估值日期',
 '估值涨跌',
 '限购(元)']

In [3]:
tbody = soup.body.table.tbody

tr_list = []
for tr in tbody.find_all('tr'):
    td_list = []
    for td in tr.find_all('td'):
        td_list.append(td.string)
    tr_list.append(td_list)

tr_list


[['160216',
  '国泰商品',
  '5.78%',
  '0.302',
  '-8.76%',
  '149.55',
  '4543.07',
  '0.294',
  '2020-03-10',
  '0.285',
  '2020-03-11',
  '-3.21%',
  '10000'],
 ['160416',
  '石油基金',
  '12.54%',
  '0.711',
  '-4.05%',
  '52.80',
  '3707.73',
  '0.666',
  '2020-03-10',
  '0.632',
  '2020-03-11',
  '-5.44%',
  '30000000'],
 ['160723',
  '嘉实原油',
  '15.27%',
  '0.847',
  '-4.94%',
  '28.42',
  '2404.48',
  '0.762',
  '2020-03-10',
  '0.735',
  '2020-03-11',
  '-3.88%',
  '30000'],
 ['161129',
  '原油基金',
  '13.69%',
  '0.794',
  '-5.14%',
  '72.53',
  '5639.96',
  '0.7219',
  '2020-03-10',
  '0.698',
  '2020-03-11',
  '-3.57%',
  '暂停申购'],
 ['162411',
  '华宝油气',
  '28.26%',
  '0.236',
  '-9.92%',
  '1229.46',
  '29296.07',
  '0.2008',
  '2020-03-10',
  '0.184',
  '2020-03-11',
  '-8.66%',
  '暂停申购'],
 ['162719',
  '广发石油',
  '12.99%',
  '0.565',
  '-5.83%',
  '23.30',
  '1303.14',
  '0.541',
  '2020-03-10',
  '0.5',
  '2020-03-11',
  '-7.87%',
  '2000'],
 ['163208',
  '诺安油气',
  '8.66%',
  '0.495',

In [4]:
#创建表
table = pd.DataFrame(tr_list,columns=thead_list)


#去除百分比%字符
table['溢价率'] = table['溢价率'].str.replace('%','')
#将字符串转化为数字格式
table['溢价率'] = pd.to_numeric(table['溢价率'],errors='ignore')
table['成交额(万元)'] = pd.to_numeric(table['成交额(万元)'],errors='ignore')

#选取溢价率超过2%且成交额>500万且不开放申购的基金
table = table[(table['溢价率'] > 2)  & 
              (table['限购(元)'].str.contains('暂停') == False) &
              (table['成交额(万元)'] > 500)].sort_values('溢价率',ascending=False)

table

Unnamed: 0,代码,名称,溢价率,现价,涨跌,成交量(万手),成交额(万元),净值,净值日期,T-1估值,估值日期,估值涨跌,限购(元)
2,160723,嘉实原油,15.27,0.847,-4.94%,28.42,2404.48,0.762,2020-03-10,0.735,2020-03-11,-3.88%,30000
5,162719,广发石油,12.99,0.565,-5.83%,23.3,1303.14,0.541,2020-03-10,0.5,2020-03-11,-7.87%,2000
1,160416,石油基金,12.54,0.711,-4.05%,52.8,3707.73,0.666,2020-03-10,0.632,2020-03-11,-5.44%,30000000
0,160216,国泰商品,5.78,0.302,-8.76%,149.55,4543.07,0.294,2020-03-10,0.285,2020-03-11,-3.21%,10000


In [8]:
table = table.loc[:,['代码','名称','溢价率','现价','T-1估值']]

#编辑haoETF原油基金估值详情
oil_desp = '基金代码 | 基金名称 | 溢价率 | 场内现价 | 场外估值 ' +'\n\n'
for index,oil in table.iterrows():
    oil_desp = oil_desp + oil['代码'] + ' | ' + oil['名称'] + ' | ' + str(oil['溢价率'])+'% | ' + oil['现价'] +' | '+oil['T-1估值'] + '\n\n'

oil_desp

'基金代码 | 基金名称 | 溢价率 | 场内现价 | 场外估值 \n\n160723 | 嘉实原油 | 15.27% | 0.847 | 0.735\n\n162719 | 广发石油 | 12.99% | 0.565 | 0.5\n\n160416 | 石油基金 | 12.54% | 0.711 | 0.632\n\n160216 | 国泰商品 | 5.78% | 0.302 | 0.285\n\n'