In [2]:
# Constructs Time Series Data for All Stocks
import pandas as pd
import numpy as np
from datetime import datetime
import tushare as ts

from scipy.stats import rankdata

import seaborn as sns

# Pull All Trade Dates
trade_dates = pd.Series(data=[pd.Timestamp(date) for date in get_trading_dates('2001-01-01', '2018-12-31')], name='trade_date')

year_start = 2001
year_end = 2012

# date_end_last_dt = max(trade_dates[trade_dates.dt.year == year_start-1])
date_start_dt = min(trade_dates[trade_dates.dt.year == year_start])
date_end_dt = max(trade_dates[trade_dates.dt.year == year_end])

# date_end_last = date_end_last_dt.strftime('%Y-%m-%d')
date_start = date_start_dt.strftime('%Y-%m-%d')
date_end = date_end_dt.strftime('%Y-%m-%d')

# Construct Stock Population
stock_all = all_instruments(type="CS", country='cn', date=date_start_dt)
stock_list = stock_all['order_book_id'].tolist()
print("Population Check - Initial #: {}".format(stock_all.shape[0]))

Population Check - Initial #: 1059


In [3]:
trade_data = get_price(stock_list, start_date=date_start, end_date=date_end, frequency='1d', 
                       fields=['close', 'total_turnover', 'volume'], 
                       adjust_type='pre', skip_suspended=False, country='cn')

return_data = get_price_change_rate(stock_list, start_date=date_start, end_date=date_end)

turnover_data = get_turnover_rate(stock_list, date_start, date_end, fields=['week', 'month'])

fundamental_data = {}
for dt in trade_dates[(trade_dates.dt.year >= year_start) & (trade_dates.dt.year <= year_end)]:
    _fundamental_data = get_fundamentals(
        query(
        fundamentals.eod_derivative_indicator.market_cap, #总市值
        fundamentals.eod_derivative_indicator.a_share_market_val_2, #流通市值
        fundamentals.cash_flow_statement.cash_received_from_sales_of_goods, #销售额 - 单季/同比
        fundamentals.eod_derivative_indicator.pb_ratio, #净资产/总市值=市净率
        fundamentals.income_statement.net_profit, #净利润
        fundamentals.eod_derivative_indicator.ps_ratio #市销率
        ).filter(fundamentals.income_statement.stockcode.in_(stock_list)), 
        entry_date=dt, interval='1q', report_quarter=True
    )
    _fundamental_data = _fundamental_data.to_frame()
    _fundamental_data.index.names = ['date', 'order_book_id']
    fundamental_data[dt] = _fundamental_data
    
fundamental_data = pd.concat(fundamental_data)
fundamental_data.reset_index(level=0, drop=True, inplace=True)

# Aggregate Data

trade_ts = trade_data.to_frame()
trade_ts.index.names = ['date', 'order_book_id']

return_ts = pd.DataFrame(return_data.stack(), columns=['return'])
return_ts.index.names = ['date', 'order_book_id']

turnover_ts = turnover_data.to_frame()
turnover_ts.index.names = ['date', 'order_book_id']

data = return_ts.merge(trade_ts, how='left', left_index=True, right_index=True)
data = data.merge(turnover_ts, how='left', left_index=True, right_index=True)
data = data.merge(fundamental_data, how='left', left_index=True, right_index=True)

# Save Data
data.to_csv("stock_data_all_2016_2018.csv")


WARN: start_date is earlier than 2005-01-04, adjusted


In [4]:
trade_ts = trade_data.to_frame()
trade_ts.index.names = ['date', 'order_book_id']

return_ts = pd.DataFrame(return_data.stack(), columns=['return'])
return_ts.index.names = ['date', 'order_book_id']

turnover_ts = turnover_data.to_frame()
turnover_ts.index.names = ['date', 'order_book_id']

data = return_ts.merge(trade_ts, how='left', left_index=True, right_index=True)
data = data.merge(turnover_ts, how='left', left_index=True, right_index=True)
data = data.merge(fundamental_data, how='left', left_index=True, right_index=True)


In [5]:
# data.to_csv("stock_data_all_2005_2012.csv")

# Break data into monthly chunks
year = 2011
for month in range(1,13):
  data_tmp = data.loc[(data['date'].dt.year == year) & (data['date'].dt.month == month), :]
  data_tmp.to_csv("stock_data_all_"+str(year)+"{0:0=2d}".format(month)+".csv")