-
Notifications
You must be signed in to change notification settings - Fork 0
/
basic_func.py
156 lines (136 loc) · 3.88 KB
/
basic_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#%%
import os
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import multiprocessing
import matplotlib.pyplot as plt
import scipy.io
import stats
#%%
'''
general idea:
1.load the table data_close as the stock price
2.code some basic alphas by using the simple functions
3.loop over the whole table, select one day as the enddate, return the portfolio
4.compare its IPO date with the enddate, if conflict, drop it
'''
#%%
def Rank(x):
# x is a list, return the values equally distributed among 0 and 1
len_x = len(x);
step = 1.0/float(len_x - 1);
cur_rank = []; ## record the rank for each entries on the list
for i in x:
rank = 1;
for j in x:
if (j < i):
rank = rank + 1;
cur_rank.append(rank);
for idx,item in enumerate(cur_rank):
cur_rank[idx] = (item - 1) * step;
return cur_rank;
def Min(x, y):
# x and y are lists, return the parallel min at each entries
pmin = [];
length = len(x);
for i in range(length):
min_xy = min(x[i], y[i]);
pmin.append(min_xy);
return pmin;
def Max(x, y):
# x and y are lists, return the parallel min at each entries
pmax = [];
length = len(x);
for i in range(length):
max_xy = max(x[i], y[i]);
pmax.append(max_xy);
return pmax;
def StdDev(x, n):
# return the std for the past n days in x
# n must be less than 256
return np.std(x[-n:]);
def sum_past(x,n):
# sum the value for past n days
return sum(x[-n:]);
def product_past(x,n):
# get the product for past n days
return np.prod(x[-n:]);
def Correlation(x,y,n):
# return the corr btw x and y in past n days
# n must be less than 256
return np.corrcoef(x[-n:], y[-n:]);
def Tail(x, lower, upper, newval):
# set the values of x to newval if they are btw lower and upper
len_x = len(x);
for i in range(len_x):
if (x[i] > lower) & (x[i] < upper):
x[i] = newval;
return x;
def Ts_Min(x,n):
return min(x[-n:]);
def Ts_Max(x,n):
return max(x[-n:]);
def SignedPower(x,e):
result = [];
for i in range(len(x)):
val = np.sign(x[i])*(abs(x[i])**e);
result.append(val);
return (result);
def Ts_Rank(x,n):
# rank the last n days' data
result = Rank(x[-n:], n);
return (result);
def Ts_skewness(x,n):
return stats.skewness(x[-n:]);
def Ts_Kurtosis(x,n):
return stats.kurtosis(x[-n:]);
def Pasteurize(x):
for i in range(len(x)):
if (x[i] == float("inf")) | (x[i] == float("-inf")):
x[i] = float("nan");
return (x);
def delay(x,n):
return x[-n];
def delta(x,n):
return x[-1] - x[-n];
def scale(x,a = 1):
sum_x = 0; #scale x with abs sum to a
for i in x:
sum_x = sum_x + abs(i);
unit = a / float(sum_x);
for idx in range(len(x)):
x[idx] = (x[idx] * unit);
return x;
def decay_linear(x,n):
weight = []; #assign weight from n days before, the farest the most
for i in range(n,0,-1):
weight.append(i);
weight_adj = scale(weight);
sum_x = 0;
for i in range(n):
val = weight_adj[i]*x[-(i+1)]; #weightd avg
sum_x = sum_x+val;
return sum_x;
def Ts_argmin(x,n):
# on which day the min val will get
target = Ts_Min(x,n);
day = -1;
for i in range(n,0,-1):
if (x[-i] == target):
day = (n-i+1);
break;
return day;
def Ts_argmax(x,n):
# on which day the max val will get
target = Ts_Max(x,n);
day = -1;
for i in range(n,0,-1):
if (x[-i] == target):
day = (n-i+1);
break;
return day;
#%%