In [1]:
import pandas as pd
import numpy as np
import operator
from sklearn.model_selection import train_test_split

In [2]:
datafile = '../data/IBEX35(201301-201512).xlsx'
xl = pd.ExcelFile(datafile)
xl.sheet_names

[u'Sheet1']

In [3]:
df = xl.parse(u'Sheet1')
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2013-01-02,8447.6,8337.9,8447.6,8286.7,280.53M,0.0343
1,2013-01-03,8403.4,8375.0,8422.6,8334.3,182.28M,-0.0052
2,2013-01-04,8435.8,8411.7,8447.0,8386.7,230.12M,0.0039
3,2013-01-07,8419.0,8434.3,8485.6,8406.3,289.12M,-0.002
4,2013-01-08,8453.0,8388.2,8493.5,8374.7,335.52M,0.004


In [4]:
df['Date'] = pd.to_datetime(df['Date'])
df.describe()

Unnamed: 0,Price,Open,High,Low,Change %
count,792.0,792.0,792.0,792.0,792.0
mean,9963.164141,9965.216667,10037.700379,9878.218687,0.000415
std,1040.412907,1042.242568,1042.808739,1037.357084,0.01215
min,7553.2,7636.6,7674.6,7508.4,-0.0501
25%,9348.35,9334.3,9416.875,9264.625,-0.0068
50%,10223.25,10227.0,10310.5,10129.35,0.001
75%,10691.6,10700.725,10761.575,10611.2,0.0076
max,11866.4,11798.5,11884.6,11760.8,0.0387


In [5]:
split_date = '2014-12-31'
train_data = df[df['Date'] <= split_date]
test_data = df[df['Date'] > split_date]
print train_data.shape, test_data.shape

(557, 7) (235, 7)


In [6]:
def universe_partition(data, d1=10, d2=10):
    x_max, x_min = data.max(axis=0), data.min(axis=0)
    std_val = data.std(axis=0)
    len_val = std_val / 10
    u_max, u_min = x_max+d2, x_min-d1 # bound of universe discourse
    u_b = np.arange(u_min, u_max, step=len_val) # cutting points
    u_s = u_b[:-1] # u1
    u_e = u_b[1:] # u2
    u_discourse = zip(u_b, u_e) # interval
    return u_discourse

In [7]:
u_discourse = universe_partition(train_data['Price'], d1=953, d2=812)
print len(u_discourse)

53


In [8]:
def set_fuzzy_numbers(u_discourse):
    fuzzy_numbers = list()
    for i, u_i in enumerate(u_discourse):
        if (i!=0) and (i!=len(u_discourse)-1):
            u_l, u_r = u_discourse[i-1], u_discourse[i+1]
            A_l, A_r = np.mean(u_l), np.mean(u_r)
            fuzzy_numbers.append((A_l, u_i[0], u_i[1], A_r))
    return fuzzy_numbers

In [9]:
A = set_fuzzy_numbers(u_discourse)
print len(A)

51


In [10]:
def membership_evaluation(value, fuzzy_number):
    A_l, u_1, u_2, A_r = fuzzy_number
    mu = 0 # membership indication
    try:
        if np.logical_and(value>=A_l, value <u_1):
            mu = (value - A_l) / (u_1 - A_l)
        elif np.logical_and(value>=u_1, value<=u_2):
            mu = 1
        elif np.logical_and(value>u_2, value<=A_r):
            mu = (value - u_2) / (A_r - u_2)
    except ZeroDivisionError:
        mu = 0
    return mu

In [11]:
def membership_assignement(price_time_series, fuzzy_numbers):
    n_fuzzy_numbers = len(fuzzy_numbers)
    membership_list = list()
    for i, price in enumerate(price_time_series):
        price_rep = [price] * n_fuzzy_numbers
        memberships = map(lambda pr, Ai: membership_evaluation(pr, Ai), price_rep, fuzzy_numbers)
        max_index, _ = max(enumerate(memberships), key=operator.itemgetter(1))
        membership_list.append(max_index)
    return membership_list

In [12]:
train_data_membership_series = membership_assignement(train_data['Price'], fuzzy_numbers=A)
print len(train_data_membership_series)

557


In [13]:
first_date = train_data['Date'][0]
train_data_days = train_data['Date'].apply(lambda x: x - first_date)
print train_data_days.describe()

count                         557
mean     372 days 01:40:49.551166
std      203 days 14:54:05.164563
min               0 days 00:00:00
25%             198 days 00:00:00
50%             398 days 00:00:00
75%             531 days 00:00:00
max             728 days 00:00:00
Name: Date, dtype: object


In [16]:
def FLR(membership_time_series): # transition between consecutive observations
    transitions = list()
    for i, Aj in enumerate(membership_time_series):
        if i!=0:
            Ai = membership_time_series[i-1]
            transitions.append((Ai, Aj))
    return transitions

In [18]:
transition_FLR = FLR(train_data_membership_series)
print len(transition_FLR)

556


In [19]:
def FLR_weight(transitions): # compute jump frequency by FLR
    pass