In [None]:
import talib
import numpy as np
import pandas as pd
import math
import scipy.stats as stats
# import numba as nb

In [None]:
def strided_app(origin_array: np.ndarray,
                window: int = 252, step=1) -> np.ndarray:
    if origin_array.size > window:
        nrows = ((origin_array.size - window) // step) + 1
    else:
        return origin_array
    n = origin_array.strides[0]
    return np.lib.stride_tricks.as_strided(origin_array,
                                           shape=(nrows, window),
                                           strides=(step * n, n))

def rolling_poly9(origin_array: np.ndarray, window: int = 252) -> np.ndarray:
    '''
    一次九项式滚动分解拟合
    '''
    index = range(window)

    def last_poly9(array_input):
        fit_params = np.polynomial.Chebyshev.fit(index, array_input, 9)
        return fit_params(index)[-1]

    if (len(origin_array) > window):
        stride_matrix = strided_app(origin_array, window, 1)
        # numpy.r_[]按照行方向拼接array，list是列向量形式存储，故仅能拼接array
        # numpy.c_[]按照列方向拼接array
        # .full()填充ndarray
        return np.r_[np.full(window - 1, np.nan),
                     np.array(list(map(last_poly9, stride_matrix)))]
    else:
        index = range(len(origin_array))
        fit_params = np.polynomial.Chebyshev.fit(index, origin_array, 9)
        y_fit_n = fit_params(index)
        return y_fit_n

In [None]:
df = pd.read_csv('data/data.csv', parse_dates=True, index_col=0)

def hma(array2hma: np.array, n: int = 10):
    return talib.WMA(2*talib.WMA(array2hma, int(n/2)) - talib.WMA(array2hma, n), int(math.sqrt(n)))
# print(df.columns)
# print(df.iloc[0:-50,3].values)
df = df[0:-10]
df['HMA10'] = hma(df['Close'].values)
# df['hma10'][~np.isnan(df['hma10'])]
# print(df['hma10'])
# print(df[hma10 != np.nan])

df['POLYNOMIAL9'] = np.r_[np.full(len(df['HMA10'][np.isnan(df['HMA10'])]),np.nan),rolling_poly9(df['HMA10'][~np.isnan(df['HMA10'])].values,252)]
# 1）相关性系数r；2）显著性水平p。
# 两者的关系为：当p<0.05(或者0.01)的前提下，才可以参考r值，不能仅仅只看r值。
df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(axis=0, how='any')
r,p = stats.pearsonr(df['Close'],df['POLYNOMIAL9'])  # 相关系数和P值
# print(df)
print(df['POLYNOMIAL9'][-50])
print('r=%r, p=%r' %(r, p))
r,p = stats.pearsonr(df['HMA10'],df['POLYNOMIAL9'])  # 相关系数和P值
print('r=%r, p=%r' %(r, p))
# rolling_poly9(df['HMA10'][~np.isnan(df['HMA10'])].values,252)]

In [None]:
a = [4,3,2,1]
def inverse_num(series):
    # 因为numba不支持enumerate，所以后动计算逆序
    count = 0
    for i in range(len(series)-1):
        for j in range(i+1, len(series)):
            if series[i] > series[j]:
                count += 1
        i += 1
    return count

inverse_num(a)

In [None]:
import numpy as np
import numba as nb
import pandas as pd


@nb.jit(nopython=True)
def thresholding_algo(y, lag, threshold, influence):
    """
    Robust peak detection algorithm (using z-scores)
    自带鲁棒性极值点识别，利用方差和ZSCORE进行时间序列极值检测。算法源自：
    https://stackoverflow.com/questions/22583391/
    本实现使用Numba JIT优化，比原版（上面）大约快了500倍。
    """
    signals = np.zeros((3, len(y)), dtype=np.float64)  # 生成3行、len(y)列的全0二维数组
    idx_signals = 0
    idx_avgFilter = 1
    idx_stdFilter = 2

    filteredY = np.copy(y)
    # signals二维数组的第一行全0
    signals[idx_avgFilter, lag-1] = np.mean(y[0:lag])  # signals二维数组第二行首个非0元素计算公式
    signals[idx_stdFilter, lag-1] = np.std(y[0:lag])  # signals二维数组第三行首个非0元素计算公式
    for i in range(lag, len(y)):
        # 把y当前元素与signals第二行前一个元素的差与阈值threshold乘以signals第三行前一个元素的积进行比较
        if abs(y[i] - signals[idx_avgFilter, i-1]) > threshold * signals[idx_stdFilter, i-1]:
            if y[i] > signals[idx_avgFilter, i-1]:
                signals[idx_signals, i] = 1
            else:
                signals[idx_signals, i] = -1

            # filteredY从0~lag-1等于y[0:lag]，从filteredY[lag]开始
            # 当前元素值等于影响因子influence和y当前位置元素乘积
            # 并加上1-influence乘以filteredY前一位置元素
            filteredY[i] = influence * y[i] + (1-influence) * filteredY[i-1]
            # signals第二行第i+1个元素等于filteredY前lag个元素的均值
            signals[idx_avgFilter, i] = np.mean(filteredY[(i-lag):i])
            # signals第三行第i+1个元素等于filteredY前lag个元素的均值
            signals[idx_stdFilter, i] = np.std(filteredY[(i-lag):i])
        else:
            signals[idx_signals, i] = 0
            filteredY[i] = y[i]
            signals[idx_avgFilter, i] = np.mean(filteredY[(i-lag):i])
            signals[idx_stdFilter, i] = np.std(filteredY[(i-lag):i])

    return signals

In [None]:
lag=5
threshold=3.5
influence=0.5
df = pd.read_csv('data/data.csv', parse_dates=True, index_col=0)
data = np.array(df.Close)
peak = thresholding_algo(data, lag=lag, threshold=threshold, influence=influence)[0,:]
print(peak)

In [None]:
import numpy as np
import numba as nb
import pandas as pd
import talib

df = pd.read_csv('data/data.csv', parse_dates=True, index_col=0)
data = np.array(df.Close)


def ma_power(data, range_list=range(5, 30)):
    def inverse_num(series):
        # 计算逆序
        count = 0
        for i in range(len(series)-1):
            for j in range(i+1, len(series)):
                if series[i] > series[j]:
                    count += 1
            i += 1
        return count

    # 准备收盘价，初始化ma多维数组
    ma_np = np.empty((len(data), len(range_list)))
    ma_count = 0

    # 列向量对应MA5-MA30
    for r in range_list:
        ma = talib.MA(data, r)
        ma_np[:, ma_count] = ma
        ma_count += 1

    ma_max = max(range_list)
    len_range_list = len(range_list)
    num = np.zeros(len(data))
    ratio = np.zeros(len(data))
    with np.errstate(invalid='ignore', divide='ignore'):
        for i in range(ma_max, len(data)):
            num[i] = inverse_num(ma_np[i, :])
            ratio[i] = num[i] / (len_range_list * (len_range_list - 1)) * 2

    return ratio


In [None]:
print(ma_power(data))

In [None]:
import psycopg2
from psycopg2 import Error

try:
    connection = psycopg2.connect(user="postgres",
                                    #   password="postgres",
                                      host="127.0.0.1",
                                      port="5432",
                                      database="stock")

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    # Print PostgreSQL details
    print("PostgreSQL server information")
    print(connection.get_dsn_parameters(), "\n")
    # Executing a SQL query
    cursor.execute("SELECT version();")
    # Fetch result
except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL", error)
finally:
    cursor.close()
    connection.close()
    print("PostgreSQL connection is closed")


In [None]:
import tushare as ts
from config.essential import TOKEN


pro = ts.pro_api(TOKEN)
data = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')

In [None]:
from datetime import date
from datetime import timedelta

res=date(2020,1,11)-timedelta(days=1)
print(res)

In [None]:
import psycopg2
from psycopg2 import Error
from config.essential import DB


try:
    # Connect to an existing database
    connection = psycopg2.connect(user=DB['username'],
                                    # password="123456",
                                    host=DB['host'],
                                    port=DB['port'],
                                    database=DB['database'])

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    # Print PostgreSQL details
    # print("PostgreSQL server information")
    # print(connection.get_dsn_parameters(), "\n")
    # Executing a SQL query
    # cursor.execute("SELECT version();")
    # Fetch result
    # record = cursor.fetchone()
    # print("You are connected to - ", record, "\n")
    cursor.execute(data.SQL)

except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL", error)
finally:
    cursor.close()
    connection.close()

In [None]:
from datetime import datetime as dt
import backtrader.feeds as btfeeds
import pandas as pd
# def get_csv_data(pathname, fromdate, todate):
#     if isinstance(fromdate, date):
#         try:
#             fromdate=dt.strptime(fromdate, '%Y-%m-%d')
#         except ValueError as error:
#             template = "An exception of type {0} occurred. Arguments:\n{1!r}"
#             message = template.format(type(error).__name__, error.args)
#             print (message)
#     if isinstance(todate, date):
#         try:
#             todate=dt.strptime(todate, '%Y-%m-%d')
#         except ValueError as error:
#             template = "An exception of type {0} occurred. Arguments:\n{1!r}"
#             message = template.format(type(error).__name__, error.args)
#             print (message)
#     data = btfeeds.YahooFinanceCSVData(
#         dataname=pathname,
#         # Do not pass values before this date
#         fromdate=fromdate,
#         # Do not pass values before this date
#         todate=todate,
#         # Do not pass values after this date
#         reverse=False)
#     return data

# stockdata = get_csv_data('data/data.csv', '2003-01-01', '2003-12-31')
stockdata = pd.read_csv('data/data.csv', header=None, skiprows=1 ,parse_dates=True)
print(stockdata.values[0,:])

In [None]:
import tushare as ts
import psycopg2
from psycopg2 import sql
from config.essential import DB
token = 'd5810b82a826762185f46bc579ae748553f276e82e6572f9e915482b'
pro = ts.pro_api(token)
data = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code, symbol, name, area, industry, list_date')
# # for ticker in data['ts_code']:
# #     print(ticker)
# print(data['ts_code'][0])
connection = psycopg2.connect(user="postgres",
                                      password="123456",
                                      host="127.0.0.1",
                                      port="5432",
                                      database="stock")

    # Create a cursor to perform database operations
cursor = connection.cursor()
query = sql.SQL('CREATE TABLE IF NOT EXISTS {table} (date DATE PRIMARY KEY, open FLOAT4, high FLOAT4, low FLOAT4, close FLOAT4, adjust FLOAT4, volumn INT)').format(table=sql.Identifier(data['ts_code'][0]))
cursor.execute(query)
connection.commit()
query = sql.SQL('INSERT INTO {table} VALUES (%s, %s, %s, %s, %s, %s, %s)').format(table=sql.Identifier(data['ts_code'][0]))
print(type(stockdata))
print(type(stockdata.values[0,:]))
cursor.executemany(query, list(stockdata.itertuples(index=False, name=None)))
connection.commit()
cursor.close()
connection.close()

In [None]:
import pandas as pd
import psycopg2
from psycopg2 import Error, sql
from config.essential import DB
db=DB
ticker = '000001.SZ'
try:
    # Connect to an existing database
    connection = psycopg2.connect(user=db['username'],
                                    password=db['password'],
                                    host=db['host'],
                                    port=db['port'],
                                    database=db['database'])

    # Create a cursor to perform database operations
    cursor = connection.cursor()
    query = sql.SQL('CREATE TABLE IF NOT EXISTS {table} (date DATE PRIMARY KEY, open FLOAT4, high FLOAT4, low FLOAT4, close FLOAT4, adjust FLOAT4, volumn INT)').format(table=sql.Identifier(ticker))
    cursor.execute(query)
    connection.commit()
    query = sql.SQL('SELECT * FROM {table} WHERE DATE BETWEEN %s AND %s ORDER BY {date} ASC').format(table=sql.Identifier(ticker), date=sql.Identifier('date'))
    cursor.execute(query, ('2000-01-01', '2003-01-20'))
    records = cursor.fetchall()
    print(type(records))
except (Exception, Error) as error:
    print('Error while connecting to PostgreSQL', error)
finally:
    cursor.close()
    connection.close()
    print('Query complete.')

In [None]:
if records == []:
    print('hello')
records = None
print(records)
print(type(records))
if records == None:
    print('this is none')

In [None]:
from config.essential import PROXY, DB
from datetime import datetime as dt
import yfinance as yf
from tools.data_yahoo import insert_ticker_data
db=DB
ticker = '000001.SZ'
fromdate='2021-07-01'
todate=dt.now().date()
data = yf.download(tickers=ticker, start=fromdate, end=todate, proxy=PROXY)
print(data)
print(type(data))
insert_ticker_data(ticker=ticker, data=data, db=db)  # insert data into db

In [None]:
# df = pd.DataFrame(data =records[:4]+records[5:])
# print(df)
# print(type(records[1]))
# print(records[for record in records return record[:4]+record[5:]])
records = [record[:4]+record[5:] for record in records]
print(records)
df = pd.DataFrame(data=records)
df.columns = ['datetime', 'open', 'high', 'low', 'close', 'volume']
df['openinterest'] = 0
df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')
df.set_index(keys='datetime', inplace=True)
print(df)

In [None]:
import re
def convert_ticker_type(ticker, style):
    if style == 'yahoo':  # yfinance接受的股票代码格式
        market = re.search('[a-zA-Z]{2}', ticker).group()
        code = re.search('\d{6}', ticker).group()
        ticker = '.'.join([code, market.upper()])
        return ticker
    elif style == 'baostock':  # baostock接受的股票代码模式
        market = re.search('[a-zA-Z]{2}', ticker).group()
        code = re.search('\d{6}', ticker).group()
        ticker = '.'.join([market.lower(), code])
        return ticker

In [None]:
import tushare as ts
tickers = pro.query('stock_basic', exchange='', list_status='L', fields='ts_code, name, area, industry, list_date')
# 把列名按照baostock格式进行修改
tickers.columns = ['code', 'code_name', 'area', 'industry', 'list_date']
code_list = []
for ticker in tickers['code']:
    code_list.append(convert_ticker_type(ticker, 'baostock'))
tickers['code'] = code_list
print(tickers)

In [None]:
import requests
from bs4 import BeautifulSoup
from fake_headers import Headers

BASEURL = 'https://www.proxyranker.com'
URL = 'https://www.proxyranker.com/china/list/'



def get_proxy(URL, proxies):
    headers = Headers(os="win", headers=True).generate()
    response = requests.get(URL, timeout=(20, 20))
    soup = BeautifulSoup(response.text)
    table = soup.select('body > div:nth-child(4) > div.bl > div.data > table > tr')
    for row in table:
        ip = row.select('tr > td:nth-child(1)')[0]
        port = row.select('tr > td:nth-child(4) > span')[0]
        proxies.append(':'.join([ip.string, port.string]))

    page_ref = soup.select('body > div:nth-child(4) > div.bl > div > table > tfoot > tr > td > div > a')
    if len(page_ref) > 1:
        URL = BASEURL + page_ref[1]['href']
        get_proxy(URL, proxies)
    return proxies



In [None]:
proxies = get_proxy(URL, proxies=[])
for proxy in proxies:
    try:
        response = requests.get('https://finance.yahoo.com/', timeout=5, proxies=dict(https=proxy))
        if response.status_code != 200:
            proxies.remove(proxy)
    except requests.exceptions.Timeout as error:
        print(error)
print(proxies)

In [114]:
import baostock as bs
import pandas as pd
from datetime import datetime as dt

#### 登陆系统 ####
lg = bs.login()
# 显示登陆返回信息
print('login respond error_code:'+lg.error_code)
print('login respond  error_msg:'+lg.error_msg)
fromdate = dt.strptime('2017-01-01', '%Y-%m-%d').strftime('%Y-%m-%d')
todate = dt.strptime('2017-01-21', '%Y-%m-%d').strftime('%Y-%m-%d')
ticker = '000005.SZ'
# 获取未复权数据
records = bs.query_history_k_data_plus(ticker,
        'date,open,high,low,close,volume',
        start_date=fromdate, end_date=todate,
        frequency='d', adjustflag='3')
print('query_history_k_data_plus respond error_code: ', records.error_code)
print('query_history_k_data_plus respond  error_msg: ', records.error_msg)
data_list = []
while (records.error_code == '0') & records.next():
    # 获取一条记录，将记录合并在一起
    data_list.append(records.get_row_data())
result = pd.DataFrame(data_list, columns=records.fields)
# 获取复权后数据
records = bs.query_history_k_data_plus(ticker,
    'close',
    start_date=fromdate, end_date=todate,
    frequency='d', adjustflag='2')
print('query_history_k_data_plus respond error_code: ', records.error_code)
print('query_history_k_data_plus respond  error_msg: ', records.error_msg)
data_list = []
while (records.error_code == '0') & records.next():
    # 获取一条记录，将记录合并在一起
    data_list.append(records.get_row_data())
result['adjust'] = pd.DataFrame(data_list)
result = result[['date','open','high','low','close','adjust','volume']]
result['date'] = pd.to_datetime(result['date']).dt.date
print(type(result['date'][0]))
print(result)

login success!
login respond error_code:0
login respond  error_msg:success


AttributeError: 'builtin_function_or_method' object has no attribute 'strftime'

In [None]:
import re
ticker = '000001.SZ'
market = re.search('[a-zA-Z]{2}', ticker).group()
code = re.search('\d{6}', ticker).group()

tickers = '.'.join([code, market.upper()])
print(tickers)