In [2]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from AnalysisUtils.analysis_utils import *

### Interpretation of log-returns

In [3]:
# interpretation of log-returns

balance = pd.Series(np.array([100,150,75,75,75]))
# ret = difference(balance)
ret = np.diff(balance)
ret_rate = difference_rate(balance)

# Log Return = ln(Final Value) – ln(Initial value)
# 
log_ret = np.array(np.log(balance/balance.shift(1)))


print("balance: ", np.array(balance))
print("ret: ", np.array(ret))
print("ret_rate: ", np.array(ret_rate))
print("log_ret: ", log_ret)

balance:  [100 150  75  75  75]
ret:  [ 50 -75   0   0]
ret_rate:  [ nan  0.5 -0.5  0.   0. ]
log_ret:  [        nan  0.40546511 -0.69314718  0.          0.        ]


### Interpretation of log_diff and reverse procedure

In [4]:
# 1. generate series
np.random.seed(0)

s = pd.Series(np.random.random(10))

print(s.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


In [5]:
# be carefully about append and prepend NAN to a difference series
np.diff(s, prepend=[np.nan]).shape

(10,)

In [6]:
# calc log diff
np.log(s).diff().to_numpy()

array([        nan,  0.26478867, -0.17102265, -0.10095328, -0.25165246,
        0.42171661, -0.38935955,  0.71193559,  0.07752978, -0.92155428])

In [7]:
# how to reverse the np.diff and np.log to get the original numpy arrays
# ** Needs the original series "s" and current series "t"
t = np.log(s).diff()
t.iat[0] = np.log(s.iat[0])
res = np.exp(t.cumsum())

print(res.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


### Pandas DataFrame and Pandas.core.Series

In [8]:
from Utils.ta import *

def Rule16(period, balance):
    close = balance
    logr = pd.Series(np.diff(close, prepend=[np.nan]))
    s1 = bollinger_hband(close, n=period)
    s2 = bollinger_lband(close, n=period)
    s3 = close
    signal = (-1 * (s3 > s1) + 1 * (s3 < s2)).shift(1)
    port_logr = signal * logr
    return (abs(port_logr.sum()), signal)

balance = pd.Series(np.array([100,150,75,75,75]))
ret = np.diff(balance, prepend=[np.nan])
df = pd.DataFrame({"logr":ret})
r1 = Rule16(17, balance)[1]

### Needs to use the following code snippets to assign pandas.core.Series to Padas.DataFrame

In [9]:
r1_s = pd.Series(r1.values, index=df.index)
df["rule1"] = r1_s

In [10]:
print("balance: ", np.array(balance))
print("ret: ", np.array(ret))
print(df)

balance:  [100 150  75  75  75]
ret:  [ nan  50. -75.   0.   0.]
   logr  rule1
0   NaN    NaN
1  50.0    0.0
2 -75.0    0.0
3   0.0    0.0
4   0.0    0.0


### Differences between @ and \* in numpy 

@ 叉乘

\* 点乘

In [11]:
print(np.array([1, 3]) @ np.array([3,2]).T)
print(np.array([1, 3]) * np.array([3,2]))

9
[3 6]


In [12]:
arr = np.array([1,2,3,4])
arr

array([1, 2, 3, 4])

In [13]:
np.where(arr == np.max(arr))

(array([3]),)

### Testing the DataFrame.set_index 

In [18]:
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 4, 6], [7, 4, 9]]),
                   columns=['a', 'b', 'c'])

In [19]:
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,4,6
2,7,4,9


In [None]:
df2.set_index(keys=["b"], drop=False, inplace=True)
df2