In [68]:
%load_ext autoreload
%autoreload 2

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import numpy as np
from AnalysisUtils.analysis_utils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Interpretation of log-returns

In [69]:
# interpretation of log-returns

balance = pd.Series(np.array([100,150,75,75,75]))
# ret = difference(balance)
ret = np.diff(balance)
ret_rate = difference_rate(balance)

# Log Return = ln(Final Value) – ln(Initial value)
# 
log_ret = np.array(np.log(balance/balance.shift(1)))


print("balance: ", np.array(balance))
print("ret: ", np.array(ret))
print("ret_rate: ", np.array(ret_rate))
print("log_ret: ", log_ret)

balance:  [100 150  75  75  75]
ret:  [ 50 -75   0   0]
ret_rate:  [ nan  0.5 -0.5  0.   0. ]
log_ret:  [        nan  0.40546511 -0.69314718  0.          0.        ]


### Interpretation of log_diff and reverse procedure

In [70]:
# 1. generate series
np.random.seed(0)

s = pd.Series(np.random.random(10))

print(s.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


In [71]:
# be carefully about append and prepend NAN to a difference series
np.diff(s, prepend=[np.nan]).shape

(10,)

In [72]:
# calc log diff
np.log(s).diff().to_numpy()

array([        nan,  0.26478867, -0.17102265, -0.10095328, -0.25165246,
        0.42171661, -0.38935955,  0.71193559,  0.07752978, -0.92155428])

In [73]:
# how to reverse the np.diff and np.log to get the original numpy arrays
# ** Needs the original series "s" and current series "t"
t = np.log(s).diff()
t.iat[0] = np.log(s.iat[0])
res = np.exp(t.cumsum())

print(res.values)

# [ 0.5488135   0.71518937  0.60276338  0.54488318  0.4236548   0.64589411
#   0.43758721  0.891773    0.96366276  0.38344152]

[0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152]


### Pandas DataFrame and Pandas.core.Series

In [74]:
from Utils.ta import *

def Rule16(period, balance):
    close = balance
    logr = pd.Series(np.diff(close, prepend=[np.nan]))
    s1 = bollinger_hband(close, n=period)
    s2 = bollinger_lband(close, n=period)
    s3 = close
    signal = (-1 * (s3 > s1) + 1 * (s3 < s2)).shift(1)
    port_logr = signal * logr
    return (abs(port_logr.sum()), signal)

balance = pd.Series(np.array([100,150,75,75,75]))
ret = np.diff(balance, prepend=[np.nan])
df = pd.DataFrame({"logr":ret})
r1 = Rule16(17, balance)[1]

### Needs to use the following code snippets to assign pandas.core.Series to Padas.DataFrame

In [75]:
r1_s = pd.Series(r1.values, index=df.index)
df["rule1"] = r1_s

In [76]:
print("balance: ", np.array(balance))
print("ret: ", np.array(ret))
print(df)

balance:  [100 150  75  75  75]
ret:  [ nan  50. -75.   0.   0.]
   logr  rule1
0   NaN    NaN
1  50.0    0.0
2 -75.0    0.0
3   0.0    0.0
4   0.0    0.0


### Differences between @ and \* in numpy 

@ 叉乘

\* 点乘

In [77]:
print(np.array([1, 3]) @ np.array([3,2]).T)
print(np.array([1, 3]) * np.array([3,2]))

9
[3 6]


In [78]:
arr = np.array([1,2,3,4])
arr

array([1, 2, 3, 4])

In [79]:
np.where(arr == np.max(arr))

(array([3]),)

### Testing the DataFrame.set_index 

In [80]:
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 4, 6], [7, 4, 9]]),
                   columns=['a', 'b', 'c'])

In [81]:
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,4,6
2,7,4,9


In [82]:
df2.set_index(keys=["b"], drop=False, inplace=True)
df2

Unnamed: 0_level_0,a,b,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,1,2,3
4,4,4,6
4,7,4,9


### pandas squeeze

In [83]:
primes = pd.Series([2, 3, 5, 7])
even_primes = primes[primes % 2 == 0]
print(type(even_primes))
even_primes

<class 'pandas.core.series.Series'>


0    2
dtype: int64

In [84]:
# from a pandas.core.series.Series to numpy.int64
# a series to a number
print(type(even_primes.squeeze()))
even_primes.squeeze()

<class 'numpy.int64'>


2

In [85]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [86]:
df_a = df[['a']]
df_a

Unnamed: 0,a
0,1
1,3


In [87]:
print(type(df_a.squeeze('columns')))
df_a.squeeze('columns')

<class 'pandas.core.series.Series'>


0    1
1    3
Name: a, dtype: int64

In [88]:
print(type(df.squeeze('rows')))
df.squeeze('rows')

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,a,b
0,1,2
1,3,4


In [89]:
df_0a = df.loc[df.index < 1, ['a']]
df_0a

Unnamed: 0,a
0,1


###              ----------------------------------

In [90]:
print(type(df_0a.squeeze('rows')))
df_0a.squeeze('rows')

<class 'pandas.core.series.Series'>


a    1
Name: 0, dtype: int64

In [91]:
print(type(df_0a.squeeze('rows').squeeze()))
df_0a.squeeze('rows').squeeze()

<class 'numpy.int64'>


1