In [1]:

import collections as col

from emulator.env_data import high2low, fix_data
from emulator.env_factor import get_factors

from params import *


import re
import os
import numpy as np
import pandas as pd
import calendar
import matplotlib.pyplot as plt

from functools import reduce
from collections import defaultdict
from seaborn import heatmap
from copy import deepcopy
from scipy.optimize import linprog as lp
from datetime import datetime, timedelta, date
from collections import OrderedDict


#工作目录，存放代码
work_dir = os.path.dirname(os.path.abspath('__file__'))
tickdata_path = os.path.join(work_dir, 'tickdata')
factors_path = os.path.join(work_dir, 'PQFactors')
PCT_CHG_path = os.path.join(work_dir, 'PCT_CHG_NM')
concat_path = os.path.join(work_dir, 'factor_preprocess', 'factors_rq')

def getting_factors(file_path):
    if os.path.isdir(file_path) == False:
        quotes = fix_data(os.path.join(tickdata_path, file_path))
        quotes = high2low(quotes, H2L_Freq)
        daily_quotes = high2low(quotes, D_Freq)
    
        Index = quotes.index
        High = quotes.high.values
        Low = quotes.low.values
        Close = quotes.close.values
        Open = quotes.open.values
        Volume = quotes.volume.values
        
        factors = get_factors(Index, Open, Close, High, Low, Volume, rolling = 20, drop = False)
      
        code = file_path.split('.')[0] + '.' +  file_path.split('.')[1]
        
        factors['code'] = code
        factors['industry_zx'] = quotes.industry_zx
        factors['name'] = quotes.name
        factors['PCT_CHG_NM'] = quotes.close.pct_change()
        factors.to_csv(os.path.join(factors_path, file_path), encoding='gbk')
        
        pct_chg = quotes.close.pct_change()
        pct_chg.to_csv(os.path.join(PCT_CHG_path, file_path), encoding='gbk')
        
    
def concat_date_factors(file_path):
    dates = ["2013-2-1", date.today().strftime('%Y-%m-%d')]
    #dates = ["2013-1-1", "2016-6-1"]
    start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
    dates_dict = OrderedDict(((start + timedelta(_)).strftime(r"%Y-%m"), None) for _ in range((end - start).days)).keys()
    dates_series = [_ for _ in dates_dict]
    
    for _ in dates_series:
        start_date = str(_) + '-01'
        last_d = calendar.monthrange(int(_.split('-')[0]), int(_.split('-')[1]))[1]
        end_date = str(_) + '-' + str(last_d)
        st = pd.to_datetime(start_date, format = '%Y-%m-%d')
        ed = pd.to_datetime(end_date,   format = '%Y-%m-%d')
        
        factors = pd.DataFrame()
        for fpath in os.listdir(factors_path)[:]:
            tmp = pd.read_csv(os.path.join(factors_path, fpath), encoding="gbk", engine='c')
            tmp = tmp.set_index(tmp.tradeTime)
            factor_date_index = pd.to_datetime(tmp.tradeTime, format = '%Y-%m-%d')
            
            date_condition = (pd.to_datetime(tmp.tradeTime, format = '%Y-%m-%d') >= st) &\
                    (pd.to_datetime(tmp.tradeTime, format = '%Y-%m-%d') <= ed)
            
            if date_condition.any() == False:
                continue
                
            x = tmp[date_condition].iloc[-1,:]
            factors = pd.concat([factors, x], axis = 1)
                
        factors = factors.T
        factors = factors.set_index(factors.code)
        #_pathname = end_date.split('-')
        #pathname = _pathname[0] + _pathname[1] + _pathname[2]
        factors.to_csv(os.path.join(concat_path, end_date)+'.csv', encoding='gbk')

def concat_chg_nm(file_path):
    pct_chg = pd.DataFrame()
    for fpath in os.listdir(PCT_CHG_path)[:]:
        #print(fpath)
        code = fpath.split('.')[0] + '.' +  fpath.split('.')[1]
        
        tmp = pd.read_csv(os.path.join(PCT_CHG_path, fpath), encoding="gbk", engine='c')
        tmp.rename(columns={'close': f'{code}'}, inplace=True)
        tmp = tmp.set_index(tmp.trading_point)
        pct_chg = pd.concat([pct_chg, tmp], axis = 1)
        del pct_chg['trading_point']
        
    pct_chg = pct_chg.T
    pct_chg.index.names =['code']
    pct_chg.to_csv(os.path.join(work_dir, 'PCT_CHG_NM.csv'), encoding='gbk')
        
    

In [2]:
for fpath in os.listdir(tickdata_path)[:]:
    factors = getting_factors(fpath)

In [3]:
concat_date_factors(factors_path)

In [4]:
concat_chg_nm(PCT_CHG_path)