# Factor Expressions

In [1]:
# Large volume implies bullish

# group_neutralize(
#   volume / (ts_sum(volume, 60) / 60),
#   sector
# )

'''
ts_sum: sum over last 60 days, 
group_neutralize: an int operator, neutralize alpha over the group, can use customized group for neutralization
'''

# Improvement

# ts_step(20) * volume / (ts_sum(volume, 60) / 60)
'''
ts_step: n for today, n-1 for yesterday, ... 1 for last day; Smooth over the last 20 days, control the turnover
'''

'\nts_step: n for today, n-1 for yesterday, ... 1 for last day; Smooth over the last 20 days, control the turnover\n'

In [2]:
# Reversion

# rank(
#   -(close - (ts_product(close, 5)) ^ 0.2)
# )

'''
ts_product: product over last n days
rank: weight equally between 0~1
'''

# Improvement

# rank( -signedpower(close - sum(close, 5)/5, 2))

'''
signedpower(x, e) = sign(x) * (abs(x) ^ e)
'''

In [None]:
# Idea: if volume ratio is increasing, then we expect a reversal because the market is absorbing the price info
# if the volume ratio is decreasing, then the price will continue the trend because the market hasn't absorbed the info yet 


# - ts_corr(
#   rank(close), rank(volume/adv20), 5
#)
'''
ts_corr: correlation over last n days
'''

# Possible improvement: compare short corr vs long corr 

In [None]:
# Combine ideas

# A = expr1
# B = expr2
# rank(scale(A, scale=1, longscale=1, shortscale=1) + scale(B, scale=1, longscale=1, shortscale=1))

In [None]:
# GroupMean
# rank(
#   group_mean(ts_delta(close, 5), 1, subindustry) - ts_delta(close, 5))
#)

In [3]:
# Earningn per share

# Looks like the alpha won't work
# (rank(eps/last_diff_value(eps, 5)) > 0.7 || volume > ts_delay(volume, 1) ?
# rank(-ts_delta(close, 5)) : -1

'''
last_diff_value: diff for non-frequent data change
'''

'\nlast_diff_value: diff for non-frequent data change\n'

In [1]:
# triggerTrade = (ts_arg_max(volume, 5) < 1) && (volume>= ts_sum(volume, 5) / 5)
# triggerExit = -1
# alphaExp = -rank(ts_delta(close, 2))
# trade_when(triggerTrade, alphaExp, triggerExit)

'''
ts_arg_max: if current dat is largest, return 0; if pervious day is the largest, return 1 etc
'''


# triggerTrade = (ts_arg_min(volume, 5) > 3) || (volume >= ts_sum(volume, 5) / 5)
# triggerExit = -1
# alphaExp = -rank((high + low) / 2 - close)
# trade_when(triggerTrade, alphaExp, triggerExit)

'\n\n'

In [2]:
# Log(pasteurize(vwap/close))
'''
Pasteurize: set to nan if the signal is inf, or instrument isn't in the universe
'''

# rank(ts_covariance(ts_std_dev(-returns, 22), (vwap-close), 22))

"\nPasteurize: set to nan if the signal is inf, or instrument isn't in the universe\n"

In [3]:
# TS Regression Operator

# A = ts_regression(close, close, 20, LAG=1, RETTYPE=3)
# B = ts_sum(ts_delay(close, 1), 2) / 2
# C = (A - B) / close
# D = 1 - rank(volume / ts_sum(volume, 30) / 30)
# - ts_rank(C, 60) / D

In [4]:
# Reversion using Min and Max

# -(ts_sum(close-min(low, ts_delay(close, 1)), 5)  /  ts_sum(max(high, ts_delay(close, 1)) - low, 5))

# -rank((close - ts_max(high, 5)) / (ts_max(high, 5)) - ts_min(low, 5))

In [1]:
# Overfitting

'''
1. What is overfitting
OS performance is much worse than ID

2. How to avoid overfitting
- Focus on the idea instead of backtesting
- Avoid changes result in very small improvement
- Logical parameter fitting (shouldn't try random number), use 5 (week), 10(biweek), 20(month), 60(3month), 120(half year), 250(year)

3. Measure the robustness
Test on different subsets of the universe, e.g. created on top2000 and test on top 500, if performance decrease, means the alpha is trained on most illiquid equities

Predictibility of alpha: remove the magnitute of signal (equalize capital of all stocks), the result is the alpha strength
'''

"\n1. What is overfitting\nOS performance is much worse than ID\n\n2. How to avoid overfitting\n- Focus on the idea instead of backtesting\n- Avoid changes result in very small improvement\n- Logical parameter fitting (shouldn't try random number), use 5 (week), 10(biweek), 20(month), 60(3month), 120(half year), 250(year)\n\n3. Measure the robustness\nTest on different subsets of the universe, e.g. created on top2000 and test on top 500, if performance decrease, means the alpha is trained on most illiquid equities\n\nPredictibility of alpha: remove the magnitute of signal (equalize capital of all stocks), the result is the alpha strength\n"

In [1]:
'''Bias
1. Look ahead bias

2. Suvivorship bias
Coverage of the database

3. Psychological bias
There is 2% drawdown in backtest, when same thing happens in OS, PM may stop the strategy early in the drawdown, which may lose further bounce back.
'''

'Bias\n1. Look ahead bias\n\n2. Suvivorship bias\nCoverage of the database\n\n3. Psychological bias\nThere is 2% drawdown in backtest, when same thing happens in OS, PM may stop the strategy early in the drawdown, which may lose further bounce back.\n'

# Fundamental Data

In [None]:
# Debt vs Equity (Creditors vs Investors)

# ts_rank(-debt/equity, 240)

'''
rank -debt/equity for each instrument over last 240 days (normalize), then return the rank of the value
'''

In [None]:
# If debt/asset ratio decrease, then risk decrease, we should long

# group_neutralize(rank(-ts_delta(debt, 60) / assets), sector)

In [None]:
# Inventory turnover: If a company can sell the products fast, it saves storage fee, insurance, deterioration

# rank(ts_zscore(inventory_turnover, 240))

# ts_zscore = (x - ts_mean(x)) / ts_std(x)

In [2]:
# sales / share ---> upward momentum

# sales_ps > last_diff_value(sales_ps, 5) ? 1 : rank(-ts_delta(close, 5))

# last_diff_value: last different value within n days

In [None]:
# How efficient a company use its fixed assets to generate goods 

# rank(ts_rank(cogs / Ppent, 240)

# cogs: cost of goods sold
# ppent: property, plant, equitment


# Combine with inventory turnover
# rank(ts_rank(cogs / Ppent, 240) * (1 + rank(ts_rank(inventory_turnover, 240)))

## Analyst Data (Estimate if EPS)

In [None]:
# Higher estimation, higher potential to increase

# rank(ts_rank(est_eps/close, 40))

In [3]:
# Reversion using P/E

# -rank(ts_delta(close/est_eps, 5))

In [4]:
# est_epsr > last_diff_value(est_epsr, 5) ? 1 : rank(-ts_delta(close, 5))

In [5]:
# Higher inventory / assets value is a problem, which may cause cashflow issue

# (0.5 - rank(ts_rank(inventory/(asset - goodwill), 60))) * rank(ts_rank(inventory_turnover, 120))

# goodwill is the brand value

## Relationship data

In [6]:
# If per customer social sentiment is increasing, go long

# ts_rank(snt_social_volume / rel_num_cust, 60)

In [None]:
# Customer

# grouprank(subindustry, argmin(rel_ret_cust / rel_num_comp, 15))

# Sentiment Data

In [7]:
# snt_value: mood indicator, negative is bearish, positive is bullish
# snt_buzz: intensity of sentiment. >1 is high, <1 is low
# snt_buzz_bfl: if historical data is missing, filled by 1
# snt_buzz_ret: snt_buzz change from previous day
# snt_bearish: scale 0-4
# snt_bullish: scale 0-4
# snt_ratio: bullish / bearish
# snt_ratio_tsrank
# snt_bearish_tsrank
# snt_bullish_tsrank
# snt_social_value: derive from social media
# snt_social_volume: events from social media

In [None]:
'''
rank(ts_rank(snt_social_volume, 60)) > 0.6 ? 
grouprank(subindustry, ((sum(snt_social+value, 10) / 10) / (ts_max(snt_social_value, 60)))) : 0
'''

In [8]:
# sentiment + price

'''
rank(((high + low) / 2 - close) / (high - low)) * 
rank(ts_rank(snt_social_volume, 40))
'''

'\nrank(((high + low) / 2 - close) / (high - low)) * \nrank(ts_rank(snt_social_volume, 40))\n'

In [10]:
# Sentiment strength

# grouprank(market, snt_bullish/snt_bearish) * ts_rank(snt_social_volume, 254)

In [11]:
# If sentiment is high, then the reversion will happen in the future, vise versa

'''
y = snt_bullish - snt_bearish

rank(-(y - ts_min(y, 10)) / ts_max(y, 10) - ts_min(y, 10))) * 
(1 + rank(snt_social_volume))
'''

'\ny = snt_bullish - snt_bearish\n\nrank(-(y - ts_min(y, 10)) / ts_max(y, 10) - ts_min(y, 10))) * \n(1 + rank(snt_social_volume))\n'