In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import os
import matplotlib.pyplot as plt
from multiprocessing import Pool, cpu_count
from pandas.tseries.offsets import CDay, DateOffset
from utils import ASXTradingCalendar

In [2]:
asx_dayoffset = CDay(calendar=ASXTradingCalendar())

In [3]:
price_path = '/Users/feizhan/Dropbox/Project2M/ASXYearlyCompanyConsolidation/'
div_path = '/Users/feizhan/Dropbox/Project2M/ASXDividendHistory'

In [4]:
def find_div_prob(kwargs):
    series = kwargs['series']
    n_days_start = kwargs['n_days_start']
    n_days_end = kwargs['n_days_end']
    perc = kwargs['perc']
    name = kwargs['name']
    start_year = kwargs['start_year']
    try:
        div_dat = pd.read_csv(os.path.join(div_path, name+'.csv'), index_col=[0])
        div_dat.index = pd.to_datetime(div_dat.index)
        div_dat = div_dat.sort_index()
        div_dat = div_dat.loc[start_year:]
        div_dat = div_dat.loc[(div_dat.action == 'DIVIDEND').values]
        if div_dat.shape[0] > 0:
            div_dates = div_dat.index.tolist()
            div_dates_start = []
            div_dates_end = []
            for div_date in div_dates:
                div_dates_start.append(div_date - n_days_start * asx_dayoffset)
                div_dates_end.append(div_date - n_days_end * asx_dayoffset)
            date_pairs = zip(div_dates_start, div_dates_end)
            inc_op = 0
            dec_op = 0
            for start_date, end_date in date_pairs:
                try:
                    start_price = series.loc[start_date]
                except KeyError:
                    continue
                max_price = series.loc[start_date:end_date].max()
                min_price = series.loc[start_date:end_date].min()
                if max_price > start_price * (1 + perc):
                    inc_op += 1
                if min_price < start_price * (1 - perc):
                    dec_op += 1
            return pd.DataFrame(
                {
                    'total': [div_dat.shape[0]], 
                    'inc_prob': [inc_op/div_dat.shape[0]],
                    'dec_prob': [dec_op/div_dat.shape[0]]
                }, 
                index=[name]
            )
        else:
            return pd.DataFrame({'total': [0], 'inc_prob': [0], 'dec_prob': [0]}, index=[name])
    except OSError:
        return pd.DataFrame({'total': [0], 'inc_prob': [0], 'dec_prob': [0]}, index=[name])

In [13]:
def find_prob_total(gpd, func, n_days_start=30, n_days_end=5, perc=.1, start_year=pd.datetime(2010, 1, 1)):
    with Pool(cpu_count()) as p:
        ret_list = p.map(
            func, 
            [
                {
                    'n_days_start': n_days_start,
                    'n_days_end': n_days_end,
                    'perc': perc,
                    'series': group['Close'],
                    'name': name,
                    'start_year': start_year
                } 
                for name, group in gpd
            ]
        )
    return pd.concat(ret_list)

In [6]:
years = [2012, 2013, 2014, 2015, 2016]
dat = pd.DataFrame({})
for year in years:
    dat = dat.append(pd.read_csv(os.path.join(price_path, str(year)+'price.csv')))

In [7]:
dat.loc[:, 'Date'] = pd.to_datetime(dat.loc[:, 'Date'])

In [8]:
dat = dat.set_index('Date')

In [9]:
dat.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,code,is_last_11_day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-01-02,0.86,0.043,0.043,0.043,0.043,0.0,1PG.AX,
2012-01-03,0.86,0.043,0.043,0.043,0.043,0.0,1PG.AX,
2012-01-04,0.86,0.043,0.043,0.043,0.043,0.0,1PG.AX,
2012-01-05,0.86,0.043,0.043,0.043,0.043,0.0,1PG.AX,
2012-01-06,0.86,0.043,0.043,0.043,0.043,0.0,1PG.AX,


In [10]:
gpd = dat.groupby('code')

In [14]:
%time div_ops = find_prob_total(gpd, find_div_prob, n_days_start=100, n_days_end=10, perc=.1, start_year=pd.datetime(2013, 1, 1))

CPU times: user 1.82 s, sys: 183 ms, total: 2 s
Wall time: 2.73 s


In [15]:
div_ops.tail()

Unnamed: 0,dec_prob,inc_prob,total
ZNZ.AX,0.0,0.666667,6
ZRL.AX,0.0,0.0,0
ZTA.AX,0.0,0.0,0
ZYB.AX,0.0,0.0,0
ZYL.AX,0.0,0.0,0


In [16]:
div_ops.loc['NEC.AX']

dec_prob    1.00
inc_prob    0.25
total       4.00
Name: NEC.AX, dtype: float64

In [126]:
start_year = 2000
end_year = 2016
start = pd.datetime(start_year, 1, 1)
end = pd.datetime(end_year, 12, 31)
code = 'ARF.AX'
code_dat = web.DataReader(code, 'yahoo', start, end)
div_dat = web.DataReader(code, 'yahoo-actions', start, end)
div_dat = div_dat.loc[(div_dat.action == 'DIVIDEND').values]