# Lunar Phases and Cryptocurrency Returns

We obtain preliminary results investigating the relation between lunar phases and cryptocurrency returns. Our findings show that trading guided by  moon phases — namely, buying at a full moon phase and selling at the next new moon phase — shows significantly higher returns than other baseline methods. Further, we show an even stronger sinusoidal relationship between the moon phase in which we begin two-week trading (approximate length of half a moon cycle) and cryptocurrency market returns. We show that highest returns are achieved when we buy at a full moon phase and sell at a new moon phase, implying these phases map to local bottoms and tops, [something that has long been hypothesized](https://twitter.com/marenaltman/status/1427292474523881483?s=20&t=NhDoyOIzXYU75ZmdoHPo8w).

In [246]:
import bisect
import glob
import numpy as np
import pandas as pd
import pylunar
import re
import seaborn as sns
import statsmodels.formula.api as smf
from tqdm import tqdm
from matplotlib import pyplot as plt

pd.options.mode.chained_assignment = None
pd.options.display.float_format = '{:,.3f}'.format
pd.set_option("display.precision", 4)

In [129]:
SECONDS_IN_DAY = 86400

## Collect data
We consider the top 30 cryptocurrency tokens, by marketcap, listed on CoinGecko on Aug. 14, 2022, not including stablecoins or wrapped tokens. The data was gathered using the [CoinGecko API](https://www.coingecko.com/en/api). The script used to call the API is in this directory `./collect_data.py`.

In [130]:
DF_MAIN = pd.concat([pd.read_csv(fname) for fname in glob.glob("./data/CoinGecko_*.csv")])

Clean up data.

We remove rows where either marketcap is zero or total volume is zero. This usually indicates to very early data, near a tokens listing.

In [131]:
# clean ticker symbol
DF_MAIN['ticker'] = [tkr.split('|')[0] for tkr in DF_MAIN['ticker']]

for col in ['market_caps', 'total_volumes']:
    print('Removing {} rows where `{}` == 0'.format(
        (DF_MAIN[col] == 0).sum(), col
    ))
    DF_MAIN = DF_MAIN[DF_MAIN[col] != 0]

# get latest marketcaps and sort tickers by that,
# preseving unixtime sorting
custom_dict = {
    v: k
    for (k, v) in enumerate(
        DF_MAIN.groupby('ticker').last().total_volumes.sort_values(ascending=False).index
    )
}
DF_MAIN = DF_MAIN.sort_values(
    by='ticker',
    key=lambda x: x.map(custom_dict)
).groupby('ticker', sort=False).apply(
    lambda x: x.sort_values('unixtime')
).reset_index(drop=True)

Removing 443 rows where `market_caps` == 0
Removing 637 rows where `total_volumes` == 0


## Summary Statistics

In [132]:
def cumulative_return(x):
    return (x.iloc[-1] - x.iloc[0]) / x.iloc[0]

def mean_monthly_log_return(x):
    return np.mean(np.log(x.shift(-30)/x).iloc[:-30])

def std_monthly_log_return(x):
    return np.std(np.log(x.shift(-30)/x).iloc[:-30])

df = DF_MAIN.groupby("ticker").agg(
    start_date=('date', lambda x: x.values[0][:10]),
    no_obs=('date', lambda x: len(x)),
    cumulative_return=('prices', cumulative_return),
    mean_monthly_log_return=('prices', mean_monthly_log_return),
    std_monthly_log_return=('prices', std_monthly_log_return),
).sort_values('start_date')
df.style.format(formatter={
    "cumulative_return": "{:,.0%}",
    "mean_monthly_log_return": "{:.3f}",
    "std_monthly_log_return": "{:.2f}"
})

Unnamed: 0_level_0,start_date,no_obs,cumulative_return,mean_monthly_log_return,std_monthly_log_return
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XRP,2013-12-27,3139,"1,364%",0.026,0.45
LTC,2013-12-27,3138,169%,0.008,0.33
BTC,2013-12-27,3138,"3,121%",0.031,0.23
DOGE,2013-12-27,3137,"12,707%",0.047,0.47
XMR,2014-05-21,2990,"6,198%",0.042,0.37
XLM,2014-08-06,2911,"4,386%",0.04,0.46
ETH,2015-08-08,2549,"127,406%",0.083,0.39
ETC,2016-07-25,2198,"6,278%",0.033,0.4
BCH,2017-08-02,1825,-81%,-0.022,0.41
BNB,2017-09-16,1778,"267,835%",0.129,0.54


### Add moon phase data

In [133]:
mi = pylunar.MoonInfo((51, 30, 36), (0, 7, 5)) # London, UK

def moon_phase_data(df):

    def moon_phase(date_string):
        # example data string:
        # "2012-06-19 14:08:00"
        mi.update(tuple(map(int, re.split("-|\s+|:", date_string))))
        return mi.fractional_phase()

    moon_phases = df['date'].apply(moon_phase)
    s = (moon_phases.shift(1) > moon_phases) & (moon_phases.shift(-1) > moon_phases)
    new_moon_idxs = s.index[s]

    def moon_cycle(idx):
        return bisect.bisect(new_moon_idxs, idx)

    def day_after_new_moon(idx):
        nearest_new_moon = new_moon_idxs[moon_cycle(idx) - 1]
        result = idx - nearest_new_moon
        if result >= 0:
            return result
        return 30 - (new_moon_idxs[0] - idx)

    s1 = df.apply(lambda row: moon_cycle(row.name),axis=1)
    s2 = df.apply(lambda row: day_after_new_moon(row.name), axis=1)
    
    return pd.DataFrame({'moon_cycle': s1, 'days_after_new_moon': s2})

In [134]:
DF_MAIN = DF_MAIN.join(DF_MAIN.groupby('ticker').apply(moon_phase_data))
DF_MAIN.tail()

Unnamed: 0,unixtime,date,ticker,prices,market_caps,total_volumes,moon_cycle,days_after_new_moon
47775,1658880000,2022-07-27 02:00:00,LEO,5.084,4764746448.096,1970945.047,39,28
47776,1658966400,2022-07-28 02:00:00,LEO,5.178,4843044935.433,1707115.191,39,29
47777,1659052800,2022-07-29 02:00:00,LEO,5.321,4965723084.313,1898325.579,40,0
47778,1659139200,2022-07-30 02:00:00,LEO,5.181,4848771860.852,2088763.614,40,1
47779,1659225600,2022-07-31 02:00:00,LEO,5.056,4716690886.729,1955159.439,40,2


## Test for relationship between moon phase and returns

- We look at n-day trading (where n is 15, 30, 60) and ... we call this a cycle

- We shift the start of the cycle by one day

- We pick 30 & 60 since these correspond to buying and selling on the same moon phase. And if there is a moon phase effect, using these days should counteract any effect for any other n day period (e.g. 7, 14).

In [None]:
def cumulative_return(row, df, days):
    #print(row)
    ts_start = row.unixtime
    ts_end = ts_start + days * SECONDS_IN_DAY
    if ts_end not in df.index:
        return np.nan
    row2 = df.loc[ts_end]
    #print(ts_start, ts_end, row.prices, row2.prices)
    return (row2.prices - row.prices) / row.prices

def mean_daily_log_return(row, df, days):
    ts_start = row.unixtime
    ts_end = ts_start + days * SECONDS_IN_DAY
    # get daily log returns in this period
    log_returns = []
    tss = list(range(ts_start, ts_end + SECONDS_IN_DAY, SECONDS_IN_DAY))
    tss = [ts for ts in tss if ts in set(df.index)]
    try:
        s = df.loc[tss].prices
        return (np.log(s.shift(-1) / s).iloc[:-1]).mean()
    except KeyError:
        return np.nan
    
def compute_returns(df):
    # remove rows where `days_after_new_moon` >= 29 since data is much sparser for these values
    df = df[df.days_after_new_moon <= 28]
    # cosine transform
    df["cos_trans"] = np.cos(2 * np.pi * df["days_after_new_moon"] / 29.53)

    for days in tqdm([15, 30, 60]):
        for fn in [cumulative_return, mean_daily_log_return]:
            col = '{}_day_{}'.format(days, fn.__name__)
            df[col] = df.apply(
                lambda row: fn(row, df.set_index("unixtime"), days),
                axis=1
            )    

    return df

In [314]:
DF_RETURNS = DF_MAIN.groupby('ticker').apply(compute_returns)

100%|█████████████████████████████████████████████| 3/3 [00:37<00:00, 12.53s/it]
100%|█████████████████████████████████████████████| 3/3 [00:17<00:00,  5.92s/it]
100%|█████████████████████████████████████████████| 3/3 [00:18<00:00,  6.06s/it]
100%|█████████████████████████████████████████████| 3/3 [00:06<00:00,  2.23s/it]
100%|█████████████████████████████████████████████| 3/3 [00:41<00:00, 13.82s/it]
100%|█████████████████████████████████████████████| 3/3 [00:37<00:00, 12.61s/it]
100%|█████████████████████████████████████████████| 3/3 [01:51<00:00, 37.12s/it]
100%|█████████████████████████████████████████████| 3/3 [00:20<00:00,  6.93s/it]
100%|█████████████████████████████████████████████| 3/3 [01:59<00:00, 39.72s/it]
100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.63s/it]
100%|█████████████████████████████████████████████| 3/3 [00:56<00:00, 18.99s/it]
100%|█████████████████████████████████████████████| 3/3 [01:20<00:00, 26.99s/it]
100%|███████████████████████

In [None]:
#DF_RETURNS.head(30)

In [315]:
# initialize DataFrame
DF_RESULT = pd.DataFrame([],
    index=pd.Index(tickers, name='Ticker:'),
    columns=pd.MultiIndex.from_product([
      ['15 Day', '30 Day', '60 Day'],
      ['Cumulative', 'Mean Daily Log'],
      #['Cumulative', 'Mean Daily Log'],
      ['coef', 't', 'P>|t|']
    ], names=['Window:', 'Return:', 'Metric:'])
)

In [316]:
DF_RETURNS = DF_RETURNS.reset_index(drop=True)

In [302]:
#DF_RETURNS

Unnamed: 0,unixtime,date,ticker,prices,market_caps,total_volumes,moon_cycle,days_after_new_moon,cos_trans,15_day_cumulative_return,15_day_mean_daily_log_return,30_day_cumulative_return,30_day_mean_daily_log_return,60_day_cumulative_return,60_day_mean_daily_log_return
0,1508284800,2017-10-18 02:00:00,ADA,0.027,696021404.308,2351678.122,0,28,0.947,-0.206,,-0.035,,,
1,1508457600,2017-10-20 02:00:00,ADA,0.030,785580027.642,8883473.082,1,0,1.000,-0.280,-0.022,-0.085,-0.003,17.080,
2,1508544000,2017-10-21 02:00:00,ADA,0.029,741202123.880,5308856.917,1,1,0.977,-0.230,-0.017,0.049,0.002,16.220,
3,1508630400,2017-10-22 02:00:00,ADA,0.028,720669848.472,2901876.122,1,2,0.911,-0.198,-0.015,0.029,0.001,16.914,
4,1508716800,2017-10-23 02:00:00,ADA,0.026,675273816.053,3505126.543,1,3,0.803,-0.144,-0.010,0.103,0.003,14.919,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46882,1658793600,2022-07-26 02:00:00,XRP,0.337,16267799275.739,12266428974.382,106,27,0.859,,,,,,
46883,1658880000,2022-07-27 02:00:00,XRP,0.337,16280339620.383,28749929057.470,106,28,0.947,,,,,,
46884,1659052800,2022-07-29 02:00:00,XRP,0.372,17993565318.244,15143779787.628,107,0,1.000,,,,,,
46885,1659139200,2022-07-30 02:00:00,XRP,0.370,18046047855.846,9366962075.838,107,1,0.977,,,,,,


In [317]:
tickers = DF_RETURNS.groupby('ticker').last().sort_values('market_caps', ascending=False).index

for i,tkr in tqdm(enumerate(tickers), total=len(tickers)):
    #print(tkr)
    df = DF_RETURNS[DF_RETURNS.ticker == tkr]
    #print(DF_MAIN.ticker)
    df = df.reset_index(drop=True)
    
    #print(df)
    
    # remove rows where `days_after_new_moon` >= 29 since data is much sparser for these values
    df = df[df.days_after_new_moon <= 28]
    # cosine transform
    cos_trans = np.cos(2 * np.pi * df["days_after_new_moon"] / 29.53)
    
    regs = []
    for days in [15, 30, 60]:
        for r in ['cumulative_return', 'mean_daily_log_return']:
            key = '{}_day_{}'.format(days, r)
            regs.append(
                smf.ols(
                    'Q("{}") ~ cos_trans'.format(key),
                    data=df
                ).fit()
            )
            
    #print(regs[0].params, reg[0].tvalues, reg[0].pvalues)
    
    DF_RESULT.loc[tkr] = [
        x for reg in regs
        for x in [
            reg.params.cos_trans,
            reg.tvalues.cos_trans,
            reg.pvalues.cos_trans
        ]
    ]
    
#DF_RESULT

100%|███████████████████████████████████████████| 30/30 [00:00<00:00, 32.16it/s]


In [318]:
DF_RESULT

Window:,15 Day,15 Day,15 Day,15 Day,15 Day,15 Day,30 Day,30 Day,30 Day,30 Day,30 Day,30 Day,60 Day,60 Day,60 Day,60 Day,60 Day,60 Day
Return:,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log
Metric:,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|
Ticker:,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
BTC,-0.001,-0.156,0.876,-0.0,-0.205,0.837,0.001,0.152,0.879,0.0,0.092,0.926,0.001,0.096,0.924,-0.0,-0.074,0.941
ETH,-0.02,-2.261,0.024,-0.001,-2.559,0.011,0.012,0.759,0.448,-0.0,-0.16,0.873,0.034,0.865,0.387,-0.0,-0.421,0.674
BNB,0.225,2.701,0.007,0.002,1.797,0.072,0.123,1.244,0.214,0.0,0.111,0.912,0.772,1.929,0.054,0.0,0.99,0.322
XRP,0.017,1.231,0.218,0.0,0.467,0.64,-0.035,-1.214,0.225,0.0,0.126,0.9,-0.112,-1.231,0.219,0.0,0.054,0.957
ADA,0.024,1.408,0.159,0.001,0.753,0.451,0.036,0.7,0.484,-0.0,-0.043,0.966,-0.08,-0.523,0.601,0.0,0.227,0.82
SOL,-0.103,-5.2,0.0,-0.006,-5.277,0.0,-0.014,-0.347,0.728,0.0,0.352,0.725,-0.064,-0.675,0.5,0.0,0.327,0.744
DOT,-0.063,-4.073,0.0,-0.004,-4.674,0.0,0.01,0.327,0.744,0.001,0.754,0.451,0.024,0.348,0.728,0.0,0.613,0.54
DOGE,-0.028,-1.72,0.086,-0.001,-1.531,0.126,0.019,0.723,0.47,0.0,0.276,0.782,0.061,1.099,0.272,0.0,0.159,0.874
MATIC,0.029,1.042,0.297,-0.0,-0.186,0.852,0.06,1.441,0.15,0.001,1.175,0.24,0.037,0.405,0.686,0.0,0.766,0.444
STETH,-0.127,-9.627,0.0,-0.008,-9.509,0.0,-0.007,-0.356,0.722,-0.001,-0.834,0.405,-0.025,-0.889,0.374,-0.0,-0.982,0.326


In [319]:
# TODO: toggle cell code

# Just styling stuff; code hidden by default

multiindex = pd.MultiIndex.from_product([
      ['15 Day', '30 Day', '60 Day'],
      ['Cumulative', 'Mean Daily Log'],
      ['coef', 't', 'P>|t|']
])

s = DF_RESULT.style.format(formatter={
    t:(
        "{:.1f}" if t[2] == 't'
        else "{:.4f}" if t[2] == 'P>|t|'
        else "{:.4f}" if t[1] == 'Mean Daily Log'
        else "{:.2f}"
    )
    for t in multiindex
})

s.columns = pd.MultiIndex.from_product([
  ['15 Day', '30 Day', '60 Day'],
  ['Cumulative', 'Mean Daily Log'],
  ['coef', 't', 'P>|t|']
], names=['Window:', 'Return:', ''])

s.set_table_styles([
    {'selector': '.index_name', 'props': 'font-weight:normal; font-weight: normal;'},
    {'selector': 'th.row_heading', 'props': 'font-weight:bold; text-align: center;'},
    {'selector': 'th.col_heading', 'props': 'text-align: center;'},
    {'selector': 'th.col_heading.level0', 'props': 'font-size: 1.5em; border-bottom: 1px solid darkgrey;'},
    {'selector': 'th.col_heading.level1', 'props': 'font-size: 1.2em; border-bottom: 1px solid darkgrey;'},
    {'selector': 'th.col_heading.level2', 'props': 'font-size: 1.2em; border-bottom: 1px solid darkgrey;'},
    {'selector': 'td', 'props': 'text-align: center; font-weight: normal;'},
    {'selector': 'th:not(.index_name)', 'props': 'background-color: black; color: white;'}
])

s.set_table_styles({
    ('30 Day', 'Cumulative', 'coef'): [
        {'selector': 'th', 'props': 'border-left: 2px solid white'},
        {'selector': 'td', 'props': 'border-left: 2px solid black'}
    ],
    ('15 Day', 'Mean Daily Log', 'coef'): [
        {'selector': 'td', 'props': 'border-left: 1px solid black'}
    ],
    ('30 Day', 'Mean Daily Log', 'coef'): [
        {'selector': 'td', 'props': 'border-left: 1px solid black'}
    ]
}, overwrite=False, axis=0)

def highlight_pvalues(s):
    def _color(pvalue):
        if pvalue < 0.001:
            return "FCF947"
        if pvalue < 0.01:
            return "FDFA75"
        if pvalue < 0.05:
            return "FEFDBA"
        if pvalue < 0.1:
            return "FFFEE8"
        else:
            return ""
    props = []
    for x in ['15 Day', '30 Day', '60 Day']:
        for y in ['Cumulative', 'Mean Daily Log']:
            pvalue = s[x, y, 'P>|t|']
            props.extend(['background-color:#{}'.format(_color(pvalue))] * 3)
    return props

s.apply(highlight_pvalues, axis=1)

Window:,15 Day,15 Day,15 Day,15 Day,15 Day,15 Day,30 Day,30 Day,30 Day,30 Day,30 Day,30 Day,60 Day,60 Day,60 Day,60 Day,60 Day,60 Day
Return:,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log,Cumulative,Cumulative,Cumulative,Mean Daily Log,Mean Daily Log,Mean Daily Log
Metric:,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|
Ticker:,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
BTC,-0.0,-0.2,0.876,-0.0001,-0.2,0.8374,0.0,0.2,0.8792,0.0,0.1,0.9264,0.0,0.1,0.9238,-0.0,-0.1,0.9409
ETH,-0.02,-2.3,0.0239,-0.0012,-2.6,0.0106,0.01,0.8,0.4479,-0.0001,-0.2,0.873,0.03,0.9,0.3872,-0.0001,-0.4,0.6738
BNB,0.23,2.7,0.007,0.0017,1.8,0.0725,0.12,1.2,0.2136,0.0001,0.1,0.9117,0.77,1.9,0.0539,0.0004,1.0,0.3224
XRP,0.02,1.2,0.2182,0.0002,0.5,0.6403,-0.03,-1.2,0.2248,0.0,0.1,0.8999,-0.11,-1.2,0.2185,0.0,0.1,0.9572
ADA,0.02,1.4,0.1592,0.0006,0.8,0.4514,0.04,0.7,0.484,-0.0,-0.0,0.9659,-0.08,-0.5,0.6013,0.0001,0.2,0.8205
SOL,-0.1,-5.2,0.0,-0.0056,-5.3,0.0,-0.01,-0.3,0.7283,0.0003,0.4,0.7248,-0.06,-0.7,0.4999,0.0002,0.3,0.744
DOT,-0.06,-4.1,0.0001,-0.0044,-4.7,0.0,0.01,0.3,0.7441,0.0006,0.8,0.4508,0.02,0.3,0.728,0.0004,0.6,0.5404
DOGE,-0.03,-1.7,0.0856,-0.0009,-1.5,0.1259,0.02,0.7,0.4696,0.0001,0.3,0.7824,0.06,1.1,0.2718,0.0,0.2,0.874
MATIC,0.03,1.0,0.2974,-0.0002,-0.2,0.8523,0.06,1.4,0.1498,0.0009,1.2,0.2403,0.04,0.4,0.6856,0.0004,0.8,0.4436
STETH,-0.13,-9.6,0.0,-0.008,-9.5,0.0,-0.01,-0.4,0.7219,-0.0005,-0.8,0.4049,-0.03,-0.9,0.3745,-0.0005,-1.0,0.3263


In [109]:
DF_RESULT

Window:,15 Day,15 Day,15 Day,30 Day,30 Day,30 Day,60 Day,60 Day,60 Day
Return:,Cumulative,Cumulative,Cumulative,Cumulative,Cumulative,Cumulative,Cumulative,Cumulative,Cumulative
Metric:,coef,t,P>|t|,coef,t,P>|t|,coef,t,P>|t|
Ticker:,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3
BTC,-0.001,-0.156,0.876,0.001,0.152,0.879,0.001,0.096,0.924
ETH,-0.02,-2.261,0.024,0.012,0.759,0.448,0.034,0.865,0.387
BNB,0.225,2.701,0.007,0.123,1.244,0.214,0.772,1.929,0.054
XRP,0.017,1.231,0.218,-0.035,-1.214,0.225,-0.112,-1.231,0.219
ADA,0.024,1.408,0.159,0.036,0.7,0.484,-0.08,-0.523,0.601
SOL,-0.103,-5.2,0.0,-0.014,-0.347,0.728,-0.064,-0.675,0.5
DOT,-0.063,-4.073,0.0,0.01,0.327,0.744,0.024,0.348,0.728
DOGE,-0.028,-1.72,0.086,0.019,0.723,0.47,0.061,1.099,0.272
MATIC,0.029,1.042,0.297,0.06,1.441,0.15,0.037,0.405,0.686
STETH,-0.127,-9.627,0.0,-0.007,-0.356,0.722,-0.025,-0.889,0.374


In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,5))
ax1.tick_params(labelrotation=90)
ax2.tick_params(labelrotation=90)
fig.suptitle('Coefficients ... TODO')

df = DF_RESULT[('15 Day', 'Cumulative', 'coef')]
g1 = sns.barplot(x=df.index, y=df.values, color='black', ax=ax1);
g1.set_xticklabels([tkr.split('|')[0] for tkr in DF_RESULT.index]);
g1.set(title='Cumulative');

df = DF_RESULT[('15 Day', 'Mean Daily Log', 'coef')]
g2 = sns.barplot(x=df.index, y=df.values, color='black', ax=ax2);
g2.set_xticklabels([tkr.split('|')[0] for tkr in DF_RESULT.index]);
g2.set(title='Mean Daily Log');

#ax2.plot(x, -y)

In [None]:
# example, KEEP!!!
#sns.lineplot(data=fmri, x="timepoint", y="signal", hue="event")

In [86]:
def cumulative_return(df, days):
    #print(row)
    ts_start = row.unixtime
    ts_end = ts_start + days * SECONDS_IN_DAY
    if ts_end not in df.index:
        return np.nan
    row2 = df.loc[ts_end]
    #print(ts_start, ts_end, row.prices, row2.prices)
    return (row2.prices - row.prices) / row.prices

def mean_daily_log_return(row, df, days):
    ts_start = row.unixtime
    ts_end = ts_start + days * SECONDS_IN_DAY
    # get daily log returns in this period
    log_returns = []
    tss = list(range(ts_start, ts_end + SECONDS_IN_DAY, SECONDS_IN_DAY))
    try:
        s = df.loc[tss].prices
        return (np.log(s.shift(-1) / s).iloc[:-1]).mean()
    except KeyError:
        return np.nan
    
def mean_daily_log_return(df, days, threshold=0.75):
    
    df2 = pd.concat([
        df[['unixtime', 'prices']].iloc[:-1].reset_index(drop=True),
        df[['unixtime', 'prices']].iloc[1:].reset_index(drop=True)
    ], axis=1)
    df2.columns = ['unixtime0', 'prices0', 'unixtime1', 'prices1']

    df2 = df2[df2.unixtime1 - df2.unixtime0 < 172800]

    if df2.shape[0] < days * threshold:
        return np.nan

    return np.log(df2.prices1/df2.prices0).mean()

In [87]:
def compute_returns(df):
    # remove rows where `days_after_new_moon` >= 29 since data is much sparser for these values
    df = df[df.days_after_new_moon <= 28]
    # cosine transform
    df["cos_trans"] = np.cos(2 * np.pi * df["days_after_new_moon"] / 29.53)

    for days in tqdm([15, 30, 60]):
        for fn in [cumulative_return, mean_daily_log_return]:
            col = '{}_day_{}'.format(days, fn.__name__)
            df[col] = df.apply(
                lambda row: fn(row, df.set_index("unixtime"), days),
                axis=1
            )    

    return df

In [88]:
df = DF_MAIN.groupby('ticker').apply(compute_returns)

100%|█████████████████████████████████████████████| 3/3 [00:05<00:00,  1.89s/it]
100%|█████████████████████████████████████████████| 3/3 [00:03<00:00,  1.22s/it]
100%|█████████████████████████████████████████████| 3/3 [00:03<00:00,  1.31s/it]
100%|█████████████████████████████████████████████| 3/3 [00:02<00:00,  1.39it/s]
100%|█████████████████████████████████████████████| 3/3 [00:06<00:00,  2.09s/it]
100%|█████████████████████████████████████████████| 3/3 [00:05<00:00,  1.89s/it]
100%|█████████████████████████████████████████████| 3/3 [00:11<00:00,  3.72s/it]
100%|█████████████████████████████████████████████| 3/3 [00:04<00:00,  1.37s/it]
100%|█████████████████████████████████████████████| 3/3 [00:11<00:00,  3.86s/it]
100%|█████████████████████████████████████████████| 3/3 [00:02<00:00,  1.33it/s]
100%|█████████████████████████████████████████████| 3/3 [00:07<00:00,  2.66s/it]
100%|█████████████████████████████████████████████| 3/3 [00:09<00:00,  3.12s/it]
100%|███████████████████████

In [90]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,unixtime,date,ticker,prices,market_caps,total_volumes,moon_cycle,days_after_new_moon,cos_trans,15_day_cumulative_return,15_day_mean_daily_log_return,30_day_cumulative_return,30_day_mean_daily_log_return,60_day_cumulative_return,60_day_mean_daily_log_return
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
ADA,14299,1508284800,2017-10-18 02:00:00,ADA,0.027,696021404.308,2351678.122,0,28,0.947,-0.206,,-0.035,,,
ADA,14301,1508457600,2017-10-20 02:00:00,ADA,0.030,785580027.642,8883473.082,1,0,1.000,-0.280,-0.022,-0.085,-0.003,17.080,
ADA,14302,1508544000,2017-10-21 02:00:00,ADA,0.029,741202123.880,5308856.917,1,1,0.977,-0.230,-0.017,0.049,0.002,16.220,
ADA,14303,1508630400,2017-10-22 02:00:00,ADA,0.028,720669848.472,2901876.122,1,2,0.911,-0.198,-0.015,0.029,0.001,16.914,
ADA,14304,1508716800,2017-10-23 02:00:00,ADA,0.026,675273816.053,3505126.543,1,3,0.803,-0.144,-0.010,0.103,0.003,14.919,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XRP,6271,1658793600,2022-07-26 02:00:00,XRP,0.337,16267799275.739,12266428974.382,106,27,0.859,,,,,,
XRP,6272,1658880000,2022-07-27 02:00:00,XRP,0.337,16280339620.383,28749929057.470,106,28,0.947,,,,,,
XRP,6274,1659052800,2022-07-29 02:00:00,XRP,0.372,17993565318.244,15143779787.628,107,0,1.000,,,,,,
XRP,6275,1659139200,2022-07-30 02:00:00,XRP,0.370,18046047855.846,9366962075.838,107,1,0.977,,,,,,


In [346]:
g = sns.FacetGrid(df, col="ticker", sharey=False, col_wrap=5)
g.map(sns.lineplot, "days_after_new_moon", "30_day_cumulative_return")

KeyError: 'ticker'

In [166]:
def mean_daily_log_return(row, df, days, threshold=.5):
    print(row)
    ts_start = row.unixtime
    ts_end = ts_start + days * SECONDS_IN_DAY
    # get daily log returns in this period
    s = df.prices.loc[ts_start:ts_start + days]
    
    #print(len(s))
    #print('SHIFT')
    #print(s.shift(-1))
    
    df_prices = pd.DataFrame({0: s, 1: s.shift(-1)})
    
    #print(df_prices)
    
    
    log_returns = []
    tss = list(range(ts_start, ts_end + SECONDS_IN_DAY, SECONDS_IN_DAY))
    try:
        s = df.loc[tss].prices
        return (np.log(s.shift(-1) / s).iloc[:-1]).mean()
    except KeyError:
        print(row.name)
        return np.nan

In [173]:
df = DF_MAIN[DF_MAIN.ticker == 'BTC']
df

Unnamed: 0,unixtime,date,ticker,prices,market_caps,total_volumes,moon_cycle,days_after_new_moon
0,1388102400,2013-12-27 01:00:00,BTC,734.270,8944473292.000,62881800.000,0,25
1,1388188800,2013-12-28 01:00:00,BTC,738.810,9002769255.000,28121600.000,0,26
2,1388275200,2013-12-29 01:00:00,BTC,726.470,8855251580.000,27018300.000,0,27
3,1388361600,2013-12-30 01:00:00,BTC,760.520,9270681761.000,24717100.000,0,28
4,1388448000,2013-12-31 01:00:00,BTC,755.160,9205343763.000,21903500.000,0,29
...,...,...,...,...,...,...,...,...
3133,1658880000,2022-07-27 02:00:00,BTC,21235.612,405692421501.527,53995620035.181,106,28
3134,1658966400,2022-07-28 02:00:00,BTC,22908.884,437193517021.845,54498281944.452,106,29
3135,1659052800,2022-07-29 02:00:00,BTC,23822.068,454806622614.759,55609627980.657,107,0
3136,1659139200,2022-07-30 02:00:00,BTC,23847.583,459609388579.555,41089335195.640,107,1


In [185]:
'''
df.apply(
    lambda row: mean_daily_log_return(row, df.set_index("unixtime"), 10),
    axis=1
)
'''

'''
df2 = pd.DataFrame({
    0: df.set_index('unixtime').prices,
    1: df.set_index('unixtime').prices.shift(-1)
}).iloc[:-1]
'''

pd.concat(df[['unixtime', 'prices']], df[['unixtime', 'prices']])

  pd.concat(df[['unixtime', 'prices']], df[['unixtime', 'prices']])


TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

In [230]:
df = DF_MAIN[DF_MAIN.ticker == 'BTC']

df2 = pd.concat([
    df[['unixtime', 'prices']].iloc[:-1].reset_index(drop=True),
    df[['unixtime', 'prices']].iloc[1:].reset_index(drop=True)
], axis=1)

df2.columns = ['unixtime0', 'prices0', 'unixtime1', 'prices1']

#df2['unixtime'].apply(lambda row: row.iloc[1] - row.iloc[0], axis=1)

df2 = df2[df2.unixtime1 - df2.unixtime0 < 172800]

# if df2.shape ...

np.log(df2.prices1/df2.prices0)


0       0.006
1      -0.017
2       0.046
3      -0.007
4       0.017
        ...  
3132   -0.004
3133    0.076
3134    0.039
3135    0.001
3136   -0.008
Length: 3136, dtype: float64

In [284]:
df = DF_MAIN[DF_MAIN.ticker == 'BTC']
days = 7

#def compute_returns(df):

# fill all missing times
ts_start = int(df.iloc[0].unixtime)
ts_end = int(df.iloc[-1].unixtime)

tss = set(range(ts_start, ts_end + SECONDS_IN_DAY, SECONDS_IN_DAY))
missing_ts = tss - set(df2.unixtime)

df2 = df[['unixtime', 'prices']]

for ts in list(missing_ts):
    df2.loc[len(df2.index)] = [ts, np.nan]

df2 = df2.sort_values('unixtime')

df2["daily_log_return"] = np.log(df2.shift(-1).prices / df2.prices) 

def _mean_daily_returns(row, days, threshold=0.75):
    #print(row)
    #print(df2.iloc[row.name: row.name + days].shape[0], days * threshold)
    if df2.iloc[row.name: row.name + days].shape[0] < days * threshold:
        return np.nan
    return df2.iloc[row.name: row.name + days].daily_log_return.mean()
    
pd.DataFrame({
    "cumulative_return": (df2.shift(-days).prices - df2.prices) / df2.prices,
    "mean_daily_log": df2.apply(lambda row: _mean_daily_returns(row, days), axis=1)
})

df2

Unnamed: 0,unixtime,prices,daily_log_return
0,1388102400,734.270,0.006
1,1388188800,738.810,-0.017
2,1388275200,726.470,0.046
3,1388361600,760.520,-0.007
4,1388448000,755.160,0.017
...,...,...,...
3133,1658880000,21235.612,0.076
3134,1658966400,22908.884,0.039
3135,1659052800,23822.068,0.001
3136,1659139200,23847.583,-0.008


In [270]:
(df2.shift(-days).prices - df2.prices) / df2.prices

0      0.124
1      0.149
2      0.266
3      0.231
4      0.094
        ... 
3133     NaN
3134     NaN
3135     NaN
3136     NaN
3137     NaN
Name: prices, Length: 3138, dtype: float64

In [271]:
df2.apply(
    lambda row: df2.iloc[row.name: row.name + days].daily_log_return.mean(),
    axis=1
)

0       0.017
1       0.020
2       0.034
3       0.030
4       0.013
        ...  
3133    0.027
3134    0.011
3135   -0.004
3136   -0.008
3137      NaN
Length: 3138, dtype: float64