In [30]:
import datetime
import pandas as pd
from pandas.compat import StringIO
try:
    from urllib.request import urlopen, Request
except ImportError:
    from urllib2 import urlopen, Request

P_TYPE = {'http': 'http://', 'ftp': 'ftp://'}
DOMAINS = {'sina': 'sina.com.cn', 'sinahq': 'sinajs.cn',
           'ifeng': 'ifeng.com', 'sf': 'finance.sina.com.cn',
           'vsf': 'vip.stock.finance.sina.com.cn', 
           'idx': 'www.csindex.com.cn', '163': 'money.163.com',
           'em': 'eastmoney.com', 'sseq': 'query.sse.com.cn',
           'sse': 'www.sse.com.cn', 'szse': 'www.szse.cn',
           'oss': 'file.tushare.org', 'idxip':'115.29.204.48',
           'shibor': 'www.shibor.org', 'mbox':'www.cbooo.cn',
           'tt': 'gtimg.cn', 'gw': 'gw.com.cn',
           'v500': 'value500.com', 'sstar': 'stock.stockstar.com',
           'dfcf': 'nufm.dfcfw.com'}


ALL_STOCK_BASICS_FILE = P_TYPE['http'] + DOMAINS['oss'] + '/tsdata/%sall%s.csv'

def day_last_week(days=-7):
    lasty = datetime.datetime.today().date() + datetime.timedelta(days)
    return str(lasty)

def last_tddate():
    today = datetime.datetime.today().date()
    today=int(today.strftime("%w"))
    if today == 0:
        return day_last_week(-2)
    else:
        return day_last_week(-1)
    
def get_stock_basics(date=None):
    """
        获取沪深上市公司基本情况
    Parameters
    date:日期YYYY-MM-DD，默认为上一个交易日，目前只能提供2016-08-09之后的历史数据

    Return
    --------
    DataFrame
               code,代码
               name,名称
               industry,细分行业
               area,地区
               pe,市盈率
               outstanding,流通股本
               totals,总股本(万)
               totalAssets,总资产(万)
               liquidAssets,流动资产
               fixedAssets,固定资产
               reserved,公积金
               reservedPerShare,每股公积金
               eps,每股收益
               bvps,每股净资
               pb,市净率
               timeToMarket,上市日期
    """
    wdate = last_tddate() if date is None else date
    wdate = wdate.replace('-', '')
    if wdate < '20160809':
        return None
    datepre = '' if date is None else wdate[0:4] + wdate[4:6] + '/'
    request = Request(ALL_STOCK_BASICS_FILE%(datepre, '' if date is None else wdate))
    text = urlopen(request, timeout=10).read()
    text = text.decode('GBK')
    text = text.replace('--', '')
    df = pd.read_csv(StringIO(text), dtype={'code':'object'})
    df = df.set_index('code')
    return df

#get_stock_basics()

#get_stock_basics('2018-01-05')

#################################################
import lxml.html
from lxml import etree
import sys
import time

DATA_GETTING_FLAG = '#'
DATE_CHK_MSG = '年度输入错误：请输入1989年以后的年份数字，格式：YYYY'
DATE_CHK_Q_MSG = '季度输入错误：请输入1、2、3或4数字'
DATA_GETTING_TIPS = '[Getting data:]'
REPORT_URL = '%s%s/q/go.php/vFinanceAnalyze/kind/mainindex/%s?s_i=&s_a=&s_c=&reportdate=%s&quarter=%s&p=%s&num=%s'
PAGES = {'fd': 'index.phtml', 'dl': 'downxls.php', 'jv': 'json_v2.php',
         'cpt': 'newFLJK.php', 'ids': 'newSinaHy.php', 'lnews':'rollnews_ch_out_interface.php',
         'ntinfo':'vCB_BulletinGather.php', 'hs300b':'000300cons.xls',
         'hs300w':'000300closeweight.xls','sz50b':'000016cons.xls',
         'dp':'all_fpya.php', '163dp':'fpyg.html',
         'emxsg':'JS.aspx', '163fh':'jjcgph.php',
         'newstock':'vRPD_NewStockIssue.php', 'zz500b':'000905cons.xls',
         'zz500wt':'000905closeweight.xls',
         't_ticks':'vMS_tradedetail.php', 'dw': 'downLoad.html',
         'qmd':'queryMargin.do', 'szsefc':'ShowReport.szse',
         'ssecq':'commonQuery.do', 'sinadd':'cn_bill_download.php', 'ids_sw':'SwHy.php',
         'idx': 'index.php', 'index': 'index.html'}
PAGE_NUM = [40, 60, 80, 100]
PY3 = (sys.version_info[0] >= 3)
REPORT_COLS = ['code', 'name', 'eps', 'eps_yoy', 'bvps', 'roe',
               'epcf', 'net_profits', 'profits_yoy', 'distrib', 'report_date']
NETWORK_URL_ERROR_MSG = '获取失败，请检查网络.'


def _check_input(year, quarter):
    if isinstance(year, str) or year < 1989 :
        raise TypeError(DATE_CHK_MSG)
    elif quarter is None or isinstance(quarter, str) or quarter not in [1, 2, 3, 4]:
        raise TypeError(DATE_CHK_Q_MSG)
    else:
        return True
    
def _write_head():
    sys.stdout.write(DATA_GETTING_TIPS)
    sys.stdout.flush()
    
def _write_console():
    sys.stdout.write(DATA_GETTING_FLAG)
    sys.stdout.flush()
    
def _get_report_data(year, quarter, pageNo, dataArr,
                     retry_count=3, pause=0.001):
    _write_console()
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(REPORT_URL%(P_TYPE['http'], DOMAINS['vsf'], PAGES['fd'],
                             year, quarter, pageNo, PAGE_NUM[1]))
            text = urlopen(request, timeout=10).read()
            text = text.decode('GBK')
            text = text.replace('--', '')
            html = lxml.html.parse(StringIO(text))
            res = html.xpath("//table[@class=\"list_table\"]/tr")
            if PY3:
                sarr = [etree.tostring(node).decode('utf-8') for node in res]
            else:
                sarr = [etree.tostring(node) for node in res]
            sarr = ''.join(sarr)
            sarr = '<table>%s</table>'%sarr
            df = pd.read_html(sarr)[0]
            df = df.drop(11, axis=1)
            df.columns = ct.REPORT_COLS
            dataArr = dataArr.append(df, ignore_index=True)
            nextPage = html.xpath('//div[@class=\"pages\"]/a[last()]/@onclick')
            if len(nextPage)>0:
                pageNo = re.findall(r'\d+', nextPage[0])[0]
                return _get_report_data(year, quarter, pageNo, dataArr)
            else:
                return dataArr
        except Exception as e:
            pass
    raise IOError(NETWORK_URL_ERROR_MSG)

    
def get_report_data(year, quarter):
    """
        获取业绩报表数据
    Parameters
    --------
    year:int 年度 e.g:2014
    quarter:int 季度 :1、2、3、4，只能输入这4个季度
       说明：由于是从网站获取的数据，需要一页页抓取，速度取决于您当前网络速度
       
    Return
    --------
    DataFrame
        code,代码
        name,名称
        eps,每股收益
        eps_yoy,每股收益同比(%)
        bvps,每股净资产
        roe,净资产收益率(%)
        epcf,每股现金流量(元)
        net_profits,净利润(万元)
        profits_yoy,净利润同比(%)
        distrib,分配方案
        report_date,发布日期
    """
    if _check_input(year,quarter) is True:
        _write_head()
        df =  _get_report_data(year, quarter, 1, pd.DataFrame())
        if df is not None:
            # df = df.drop_duplicates('code')
            df['code'] = df['code'].map(lambda x:str(x).zfill(6))
        return df


get_report_data(2019, 1)

[Getting data:]#

OSError: 获取失败，请检查网络.

In [38]:
import pandas as pd
import datetime

ALL_DAY_FILE = P_TYPE['http'] + DOMAINS['oss'] + '/tsdata/h/%s%s.csv'
P_TYPE = {'http': 'http://', 'ftp': 'ftp://'}
PAGE_NUM = [40, 60, 80, 100]

def day_last_week(days=-7):
    lasty = datetime.datetime.today().date() + datetime.timedelta(days)
    return str(lasty)

def last_tddate():
    today = datetime.datetime.today().date()
    today=int(today.strftime("%w"))
    if today == 0:
        return day_last_week(-2)
    else:
        return day_last_week(-1)
    
def get_day_all(date=None):
    """
    获取每日收盘行情
    Parameters:
    -------------
    date:交易日期，格式:YYYY-MM-DD
    
    Return:
    -------------
    DataFrame
    code 代码, name 名称, p_change 涨幅%,
    price 现价, change 涨跌, open 今开, high 最高,
    low 最低, preprice 昨收, pe 市盈(动),
    volratio 量比, turnover 换手%, range 振幅%%,
    volume 总量, selling 内盘, buying 外盘,
    amount 总金额, totals 总股本(万), industry 细分行业,
    area 地区, floats 流通股本(万), fvalues 流通市值,
    abvalues AB股总市值, avgprice 均价, strength 强弱度%,
    activity 活跃度, avgturnover 笔换手, attack 攻击波%,
    interval3 近3月涨幅 ，interval 近6月涨幅
    """
    wdate = last_tddate() if date is None else date
    wdate = wdate.replace('-', '')

    if wdate < '20170614':
        return None
    datepre = '' if date is None else wdate[0:4] + wdate[4:6] + '/'
    url = ALL_DAY_FILE%(datepre,'hq' if date is None else wdate)
    print(url)
    df = pd.read_csv(url, dtype={'code':'object'})
    
    return df

get_day_all()

http://file.tushare.org/tsdata/h/hq.csv


Unnamed: 0,code,name,p_change,price,change,open,high,low,preprice,pe,...,floats,fvalues,abvalues,avgprice,strength,activity,avgturnover,attack,interval3,interval6
0,688288,鸿泉物联,43.35,44.81,13.55,30.30,44.96,30.16,31.26,68.55,...,0.20,9.12,44.81,36.65,43.34,3338.0,0.0229,48.57,,
1,688023,安恒信息,29.30,114.44,25.93,94.70,118.80,94.70,88.51,0.00,...,0.15,17.72,84.77,105.25,29.29,2522.0,0.0167,20.84,,
2,688021,奥福环保,18.94,39.50,6.29,32.35,40.76,31.90,33.21,58.62,...,0.18,7.19,30.53,35.55,18.94,3051.0,0.0210,23.82,,
3,688299,长阳科技,18.14,21.10,3.24,17.39,23.31,17.33,17.86,45.80,...,0.65,13.64,59.62,19.52,18.14,3933.0,0.0173,21.75,,
4,688389,普门科技,10.94,20.08,1.98,18.90,21.20,18.68,18.10,96.19,...,0.35,6.98,84.78,19.78,10.94,2935.0,0.0222,7.49,,
5,300315,掌趣科技,10.09,4.91,0.45,4.45,4.91,4.41,4.46,29.44,...,26.00,127.68,135.39,4.80,9.52,3788.0,0.0013,11.34,,
6,000687,华讯方舟,10.06,7.11,0.65,6.74,7.11,6.70,6.46,0.00,...,7.53,53.52,54.48,7.05,9.49,995.0,0.0011,6.12,,
7,002291,星期六,10.05,8.65,0.79,8.00,8.65,7.73,7.86,40.67,...,3.99,34.51,56.99,8.33,9.48,3206.0,0.0030,11.90,,
8,000662,天夏智慧,10.04,4.93,0.45,4.46,4.93,4.46,4.48,57.61,...,10.93,53.88,53.89,4.77,9.47,1363.0,0.0020,10.54,,
9,002169,智光电气,10.03,7.24,0.66,6.61,7.24,6.51,6.58,97.93,...,7.63,55.28,57.04,6.92,9.46,3607.0,0.0021,11.21,,


In [46]:
import json

P_TYPE = {'http': 'http://', 'ftp': 'ftp://'}
SINA_FUND_INFO_URL = '%s%s/fundInfo/api/openapi.php/FundPageInfoService.tabjjgk?symbol=%s&format=json'
DOMAINS = {'sina': 'sina.com.cn', 'sinahq': 'sinajs.cn',
           'ifeng': 'ifeng.com', 'sf': 'finance.sina.com.cn',
           'ssf': 'stock.finance.sina.com.cn',
           'vsf': 'vip.stock.finance.sina.com.cn',
           'idx': 'www.csindex.com.cn', '163': 'money.163.com',
           'em': 'eastmoney.com', 'sseq': 'query.sse.com.cn',
           'sse': 'www.sse.com.cn', 'szse': 'www.szse.cn',
           'oss': '218.244.146.57',
           'shibor': 'www.shibor.org'}
K_LABELS = ['D', 'W', 'M']
K_MIN_LABELS = ['5', '15', '30', '60']
K_TYPE = {'D': 'akdaily', 'W': 'akweekly', 'M': 'akmonthly'}
INDEX_LABELS = ['sh', 'sz', 'hs300', 'sz50', 'cyb', 'zxb', 'zx300', 'zh500']
DAY_PRICE_COLUMNS = ['date', 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
                     'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20', 'turnover']
INX_DAY_PRICE_COLUMNS = ['date', 'open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change',
                         'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20']

DAY_PRICE_URL = '%sapi.finance.%s/%s/?code=%s&type=last'



def today_last_year():
    lasty = datetime.datetime.today().date() + datetime.timedelta(-365)
    return str(lasty)

def today():
    day = datetime.datetime.today().date()
    return str(day) 
def _code_to_symbol(code):
    '''
        生成symbol代码标志
    '''
    if code in INDEX_LABELS:
        return INDEX_LIST[code]
    else:
        if len(code) != 6 :
            return code
        else:
            return 'sh%s'%code if code[:1] in ['5', '6', '9'] or code[:2] in ['11', '13'] else 'sz%s'%code

        
        
def get_hist_data(code=None, start=None, end=None,
                  ktype='D', retry_count=3,
                  pause=0.001):
    """
        获取个股历史交易记录
    Parameters
    ------
      code:string
                  股票代码 e.g. 600848
      start:string
                  开始日期 format：YYYY-MM-DD 为空时取到API所提供的最早日期数据
      end:string
                  结束日期 format：YYYY-MM-DD 为空时取到最近一个交易日数据
      ktype：string
                  数据类型，D=日k线 W=周 M=月 5=5分钟 15=15分钟 30=30分钟 60=60分钟，默认为D
      retry_count : int, 默认 3
                 如遇网络等问题重复执行的次数 
      pause : int, 默认 0
                重复请求数据过程中暂停的秒数，防止请求间隔时间太短出现的问题
    return
    -------
      DataFrame
          属性:日期 ，开盘价， 最高价， 收盘价， 最低价， 成交量， 价格变动 ，涨跌幅，5日均价，10日均价，20日均价，5日均量，10日均量，20日均量，换手率
    """
    symbol = _code_to_symbol(code)
    url = ''
    if ktype.upper() in K_LABELS:
        url = DAY_PRICE_URL%(P_TYPE['http'], DOMAINS['ifeng'],
                                K_TYPE[ktype.upper()], symbol)
        print(url)
    elif ktype in K_MIN_LABELS:
        url = DAY_PRICE_MIN_URL%(P_TYPE['http'], DOMAINS['ifeng'],
                                    symbol, ktype)
    else:
        raise TypeError('ktype input error.')
    
    for _ in range(retry_count):
        time.sleep(pause)
        try:
            request = Request(url)
            lines = urlopen(request, timeout = 10).read()
            if len(lines) < 15: #no data
                return None
        except Exception as e:
            print(e)
        else:
            js = json.loads(lines.decode('utf-8') if PY3 else lines)
            cols = []
            if (code in INDEX_LABELS) & (ktype.upper() in K_LABELS):
                cols = INX_DAY_PRICE_COLUMNS
            else:
                cols = DAY_PRICE_COLUMNS
            if len(js['record'][0]) == 14:
                cols = INX_DAY_PRICE_COLUMNS
            df = pd.DataFrame(js['record'], columns=cols)
            if ktype.upper() in ['D', 'W', 'M']:
                df = df.applymap(lambda x: x.replace(u',', u''))
                df[df==''] = 0
            for col in cols[1:]:
                df[col] = df[col].astype(float)
            if start is not None:
                df = df[df.date >= start]
            if end is not None:
                df = df[df.date <= end]
            if (code in INDEX_LABELS) & (ktype in K_MIN_LABELS):
                df = df.drop('turnover', axis=1)
            df = df.set_index('date')
            df = df.sort_index(ascending = False)
            return df
    raise IOError(NETWORK_URL_ERROR_MSG)


get_hist_data('600123',start='20150101',end='20191107')

http://api.finance.ifeng.com/akdaily/?code=sh600123&type=last


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-11-07,6.24,6.26,6.22,6.21,30536.07,-0.01,-0.16,6.216,6.282,6.383,62507.54,59121.28,56563.04
2019-11-06,6.23,6.28,6.23,6.20,47970.00,0.01,0.16,6.214,6.301,6.401,66321.89,59417.40,56945.91
2019-11-05,6.16,6.26,6.22,6.13,61220.45,0.06,0.97,6.222,6.319,6.416,74246.00,57416.60,56576.29
2019-11-04,6.25,6.26,6.16,6.10,118289.05,-0.09,-1.44,6.250,6.341,6.431,73747.86,54404.95,55243.32
2019-11-01,6.21,6.26,6.25,6.18,54522.11,0.04,0.64,6.312,6.365,6.448,58862.79,46334.84,51995.68
2019-10-31,6.27,6.30,6.21,6.21,49607.82,-0.06,-0.96,6.348,6.381,6.463,55735.02,47856.00,52091.91
2019-10-30,6.36,6.36,6.27,6.26,87590.56,-0.09,-1.42,6.388,6.410,6.474,52512.92,47364.15,52545.81
2019-10-29,6.46,6.47,6.36,6.36,58729.74,-0.11,-1.70,6.416,6.435,6.484,40587.21,43953.21,51450.83
2019-10-28,6.42,6.47,6.47,6.40,43863.70,0.04,0.62,6.432,6.455,6.495,35062.04,44701.21,51630.06
2019-10-25,6.41,6.44,6.43,6.37,38883.29,0.02,0.31,6.418,6.471,6.501,33806.90,50238.54,52416.09
