#### 資料預處理
使用hw1的爬蟲程式後會得到一直到現時刻的ETF資料，每檔ETF為一個csv檔，全部放在datadir資料夾底下。接著把資料處理成3年期間內每隔一週取一筆資料和每隔一月取筆資料的形式，另外，將原本的價格資料轉化為與上期的變動比例，以便後續計算。

In [7]:
import os, pickle
import pandas as pd
etf_list = os.listdir('datadir/')
years = 3
days_of_week = 5
weeks_of_year = 52
days_of_month = 21
months_of_year = 12
etf_dict = {}
for fname in etf_list:
    etf_name = fname.split('.')[0]
    data = pd.read_csv('datadir/' + fname, parse_dates = True)['Adj Close']
    week_data = []
    for i in range(len(data) - days_of_week * weeks_of_year * years, len(data), days_of_week):
        week_data.append(data[i])
    for i in range(len(week_data) -1, 0, -1):
        week_data[i] = (week_data[i] - week_data[i - 1]) / week_data[i - 1]
    del week_data[0]
    month_data = []
    for i in range(len(data) - days_of_month * months_of_year * years, len(data), days_of_month):
        month_data.append(data[i])
    for i in range(len(month_data) -1, 0, -1):
        month_data[i] = (month_data[i] - month_data[i - 1]) / month_data[i - 1]
    del month_data[0]
    etf_dict[etf_name] = {'week_data': week_data, 'month_data': month_data}
with open('etf_dict.pkl', 'wb') as f:
    pickle.dump(etf_dict, f)

In [8]:
import os, pickle
with open('etf_dict.pkl', 'rb') as f:
    etf_dict = pickle.load(f)

#### 三種指標的計算

In [226]:
def evaluate_A(X, is_month = True):
    X = np.array(X)
    mu, sigma, skew = np.mean(X), np.std(X), scipy.stats.skew(X)
    dt = 1 / 12 if is_month else 1 / 52
    r = rf * dt
    SR = (mu - r) / sigma * dt ** 0.5
    b3 = 2
    ASSR = SR * (1 + b3 * skew / 3 * SR) ** 0.5
    return ASSR
def evaluate_B(X, is_month = True):
    dt = 1 / 12 if is_month else 1 / 52
    r = rf * dt
    A = np.array(X)
    B = np.array(list(filter(lambda x: x < r, X)))
    omega = (np.mean(A) - r) / ((r * len(B) - np.sum(B)) / len(A))
    return omega
def f(x, g, r):
    return sum([np.exp(-x * (i - r)) for i in g]) - len(g)
def evaluate_C(X, is_month = True, etf = ''):
    dt = 1 / 12 if is_month else 1 / 52
    r = rf * dt
    roots = fsolve(f, [15], (X, r))
    if roots[0] > 1e-5:
        root = roots[0]
    else:
        roots = fsolve(f, [-15], (X, r))
        root = roots[0]
    return np.exp(-roots[0])

In [227]:
import numpy as np
import scipy.stats
from scipy.optimize import root,fsolve
import matplotlib.pyplot as plt
score = {'week': {}, 'month': {}}
rf = 0.0243
etf_list = [etf for etf in etf_dict]
for etf in etf_dict:
    score['week'][etf] = {'A': evaluate_A(etf_dict[etf]['week_data'], False), 'B': evaluate_B(etf_dict[etf]['week_data'], False), 'C': evaluate_C(etf_dict[etf]['week_data'], False, etf)}
    score['month'][etf] = {'A': evaluate_A(etf_dict[etf]['month_data']), 'B': evaluate_B(etf_dict[etf]['month_data']), 'C': evaluate_C(etf_dict[etf]['month_data'])}
score['week']

{'NFRA': {'A': 0.008323470543617147,
  'B': 0.17338379213769908,
  'C': 0.0003920036072456576},
 'DOL': {'A': 0.01148105178809248,
  'B': 0.25585355215750955,
  'C': 0.00010218643455262517},
 'CGW': {'A': 0.016867309735854166,
  'B': 0.38711041253935236,
  'C': 7.808057022575456e-07},
 'SMH': {'A': 0.02522092663946969,
  'B': 0.6183294300189554,
  'C': 6.400351108172568e-06},
 'JXI': {'A': 0.012015579341412157,
  'B': 0.2589053714202352,
  'C': 2.2974160473128374e-05},
 'EFA': {'A': 0.012076702059478563,
  'B': 0.2699380518040314,
  'C': 7.407539588619816e-05},
 'EFV': {'A': 0.011408281725466237,
  'B': 0.26109316812668365,
  'C': 0.00020128006958852144},
 'HEFA': {'A': 0.015624767409516738,
  'B': 0.3505321304599574,
  'C': 2.9508361113433935e-06},
 'GDX': {'A': 0.007765804812899182,
  'B': 0.15362405254853292,
  'C': 0.07240156171414239},
 'VT': {'A': 0.0152557869160234,
  'B': 0.34394100061780697,
  'C': 3.6808367645698757e-06},
 'VEA': {'A': 0.012760753015594647,
  'B': 0.284766890

將ETF照指標結果排序，其中A和B指標越大越好，C指標是risk index，越小越好。

In [228]:
import pandas as pd
ranking = {}
for interval in ['week', 'month']:
    ranking[interval] = {}
    for evl in ['A', 'B', 'C']:
        ranking[interval][evl] = sorted(etf_list, key = lambda x:  score[interval][x][evl], reverse = (evl != 'C'))

#### 週分析資料

In [229]:
df = {}
for interval in ['week', 'month']:
    df[interval] = {}
    for evl in ['A', 'B', 'C']:
        df[interval][evl] = pd.DataFrame(ranking[interval][evl], index = list(range(1, len(etf_list) + 1)), columns = ['ETF Symbol'])
        df[interval][evl].index.name = 'Ranking'
        df[interval][evl].columns.name = evl + '指標'
display(df['week']['A'])
display(df['week']['B'])
display(df['week']['C'])

A指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,SMH
2,XT
3,PICK
4,ROBO
5,IOO
6,ACWV
7,WOOD
8,ACWI
9,GUNR
10,CRBN


B指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,SMH
2,XT
3,ACWV
4,ROBO
5,WOOD
6,IOO
7,PICK
8,ACWI
9,CRBN
10,GUNR


C指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,ACWV
2,XT
3,RODM
4,IOO
5,ACWI
6,CRBN
7,IDLV
8,GII
9,IGF
10,WDIV


#### 月分析資料

In [230]:
display(df['month']['A'])
display(df['month']['B'])
display(df['month']['C'])

A指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,XT
2,SMH
3,MOO
4,IOO
5,ACWI
6,CRBN
7,ROBO
8,ACWV
9,WOOD
10,GUNR


B指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,XT
2,SMH
3,MOO
4,IOO
5,ACWI
6,ROBO
7,CRBN
8,CGW
9,ACWV
10,GUNR


C指標,ETF Symbol
Ranking,Unnamed: 1_level_1
1,XT
2,MOO
3,IOO
4,ACWV
5,CRBN
6,ACWI
7,RODM
8,WDIV
9,URTH
10,CGW


#### 相似度分析
此部分的數據會在README.md中用來分析評比結果的相似程度。

In [231]:
def reversed_pairs(A):
    ret = 0
    for i in range(len(A)):
        for j in range(i + 1, len(A)):
            if A[i] > A[j]:
                ret += 1
    return ret
def similarity(A, B):
    for i in range(len(B)):
        B[i] = A.index(B[i]) + 1
    return reversed_pairs(B) / (len(B) * (len(B) - 1) / 2)

In [232]:
print(similarity(list(ranking['week']['A']), list(ranking['week']['B'])))
print(similarity(list(ranking['week']['B']), list(ranking['week']['C'])))
print(similarity(list(ranking['week']['A']), list(ranking['week']['C'])))

0.03268641470888662
0.1481103166496425
0.14198161389172625


In [233]:
print(similarity(list(ranking['month']['A']), list(ranking['month']['B'])))
print(similarity(list(ranking['month']['B']), list(ranking['month']['C'])))
print(similarity(list(ranking['month']['A']), list(ranking['month']['C'])))

0.018896833503575076
0.12538304392236976
0.1274259448416752


In [234]:
print(similarity(list(ranking['week']['A']), list(ranking['month']['A'])))
print(similarity(list(ranking['week']['B']), list(ranking['month']['B'])))
print(similarity(list(ranking['week']['C']), list(ranking['month']['C'])))

0.05669050051072523
0.06128702757916241
0.1008682328907048
