## Goal of this notebook
Build composite models for two week momentum, three week momentum, two month momentum, three month momentum, volume, and intra-day range

In [1]:
from stock_utils import *

In [2]:
df = pd.DataFrame()
df = df.from_csv('stock_data/nvda.csv')

weekly_movements = get_price_movements(df, period=7)
weekly_movement_categories = categorize_movements(weekly_movements, n_cats=4)
monthly_movements = get_price_movements(df, period=30)
monthly_movement_categories = categorize_movements(monthly_movements, n_cats=4)

two_week_movement_trends = get_trends(weekly_movement_categories, 2)
three_week_movement_trends = get_trends(weekly_movement_categories, 3)
two_month_movement_trends = get_trends(monthly_movement_categories, 2)
three_month_movement_trends = get_trends(monthly_movement_categories, 3)

movement_category_types = ['bd', 'sd', 'sg', 'bg']

In [3]:
local_two_week_probs = build_model_probabilities(weekly_movement_categories, two_week_movement_trends, 2)
local_one_week_probs = build_model_probabilities(weekly_movement_categories, [], 1)

local_two_month_probs = build_model_probabilities(monthly_movement_categories, two_month_movement_trends, 2)
local_one_month_probs = build_model_probabilities(monthly_movement_categories, [], 1)

## Local 1 week model v.s. Local 2 week model

In [4]:
m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(weekly_movement_categories, local_one_week_probs, 1, local_two_week_probs, 2, sample_size=20)
    
print('Local one week model won ' + str(m1_wins) + ' times')
print('Local two week model won ' + str(m2_wins) + ' times')

Local one week model won 4843 times
Local two week model won 4683 times


## Composite 1 week model v.s. Composite 2 week model

In [6]:
all_two_week_trends, _, composite_one_week_probs, all_weekly_movement_categories = \
    get_trends_all_stocks(7, 2, movement_category_types, n_cats=4)

composite_two_week_probs = build_model_probabilities(all_weekly_movement_categories, all_two_week_trends, 2)

In [8]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, composite_one_week_probs, 
                                                      1, composite_two_week_probs, 2, sample_size=20)

print('Composite one week model won ' + str(m1_wins) + ' times')
print('Composite two week model won ' + str(m2_wins) + ' times')

Composite one week model won 313 times
Composite two week model won 9278 times


## Local 2 week model v.s. Composite 2 week model
Likely overfit

In [9]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, local_two_week_probs, 
                                                      2, composite_two_week_probs, 2, sample_size=20)

print('Local two week model won ' + str(m1_wins) + ' times')
print('Composite two week model won ' + str(m2_wins) + ' times')

Local two week model won 9065 times
Composite two week model won 796 times


## Composite 2 week model v.s. Composite 3 week model

In [10]:
all_three_week_trends, _, _, _ = get_trends_all_stocks(7, 3, movement_category_types, n_cats=4)

composite_three_week_probs = build_model_probabilities(all_weekly_movement_categories, all_three_week_trends, 3)

In [11]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, composite_two_week_probs, 
                                                      2, composite_three_week_probs, 3, sample_size=25)

print('Composite two week model won ' + str(m1_wins) + ' times')
print('Composite three week model won ' + str(m2_wins) + ' times')

Composite two week model won 2470 times
Composite three week model won 7196 times


## Composite 1 month model v.s. Composite 2 month model

In [12]:
all_two_month_trends, _, composite_one_month_probs, all_monthly_movement_categories = \
    get_trends_all_stocks(30, 2, movement_category_types, n_cats=4)

composite_two_month_probs = build_model_probabilities(all_monthly_movement_categories, all_two_month_trends, 2)

In [13]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_one_month_probs, 
                                                      1, composite_two_month_probs, 2, sample_size=10)

print('Composite one month model won ' + str(m1_wins) + ' times')
print('Composite two month model won ' + str(m2_wins) + ' times')

Composite one month model won 6081 times
Composite two month model won 2155 times


## Composite 2 month model v.s. Composite 3 month model

In [14]:
all_three_month_trends, _, _, _ = get_trends_all_stocks(30, 3, movement_category_types, n_cats=4)

composite_three_month_probs = build_model_probabilities(all_monthly_movement_categories, all_three_month_trends, 3)

In [15]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_two_month_probs, 
                                                      2, composite_three_month_probs, 3, sample_size=10)

print('Composite two month model won ' + str(m1_wins) + ' times')
print('Composite three month model won ' + str(m2_wins) + ' times')

Composite two month model won 1598 times
Composite three month model won 7860 times


## Composite 1 month model v.s. Composite 3 month model

In [16]:
m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_one_month_probs, 
                                                      1, composite_three_month_probs, 3, sample_size=10)

print('Composite one month model won ' + str(m1_wins) + ' times')
print('Composite three month model won ' + str(m2_wins) + ' times')

Composite one month model won 1425 times
Composite three month model won 8422 times


# All Stocks

In [17]:
g = glob.glob('stock_data/*.csv')

In [18]:
## Get composite weekly probabilities
all_two_week_trends, _, composite_one_week_probs, all_weekly_movement_categories = \
    get_trends_all_stocks(7, 2, movement_category_types, n_cats=4)
all_three_week_trends, _, _, _ = get_trends_all_stocks(7, 3, movement_category_types, n_cats=4)
composite_two_week_probs = build_model_probabilities(all_weekly_movement_categories, all_two_week_trends, 2)
composite_three_week_probs = build_model_probabilities(all_weekly_movement_categories, all_three_week_trends, 3)

## Get composite monthly probabilities
all_two_month_trends, _, composite_one_month_probs, all_monthly_movement_categories = \
    get_trends_all_stocks(30, 2, movement_category_types, n_cats=4)
all_three_month_trends, _, _, _ = get_trends_all_stocks(30, 3, movement_category_types, n_cats=4)
composite_two_month_probs = build_model_probabilities(all_monthly_movement_categories, all_two_month_trends, 2)
composite_three_month_probs = build_model_probabilities(all_monthly_movement_categories, all_three_month_trends, 3)

for filename in g:
    ticker = ticker_from_csv(filename)
    df = pd.DataFrame()
    df = df.from_csv(filename)
    weekly_movements = get_price_movements(df, period=7)
    weekly_movement_categories = categorize_movements(weekly_movements, n_cats=4)
    monthly_movements = get_price_movements(df, period=30)
    monthly_movement_categories = categorize_movements(monthly_movements, n_cats=4)

    two_week_movement_trends = get_trends(weekly_movement_categories, 2)
    three_week_movement_trends = get_trends(weekly_movement_categories, 3)
    two_month_movement_trends = get_trends(monthly_movement_categories, 2)
    three_month_movement_trends = get_trends(monthly_movement_categories, 3)

    local_two_week_probs = build_model_probabilities(weekly_movement_categories, two_week_movement_trends, 2)
    local_one_week_probs = build_model_probabilities(weekly_movement_categories, [], 1)
    local_two_month_probs = build_model_probabilities(monthly_movement_categories, two_month_movement_trends, 2)
    local_one_month_probs = build_model_probabilities(monthly_movement_categories, [], 1)
    
    print('=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
    print('             ' + ticker)
    print('=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
    print()
    
    print('--------------------------------------------------')
    print('Local 1 week model v.s. Local 2 week model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = \
      random_sample_tests_m1_m2(weekly_movement_categories, local_one_week_probs, 1, local_two_week_probs, 2, sample_size=20)
    
    print('Local one week model won ' + str(m1_wins) + ' times')
    print('Local two week model won ' + str(m2_wins) + ' times')
    print()
    
    print('--------------------------------------------------')
    print('Composite 1 week model v.s. Composite 2 week model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, composite_one_week_probs, 
                                                      1, composite_two_week_probs, 2, sample_size=20)

    print('Composite one week model won ' + str(m1_wins) + ' times')
    print('Composite two week model won ' + str(m2_wins) + ' times')
    print()
    
    print('--------------------------------------------------')
    print('Local 2 week model v.s. Composite 2 week model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, local_two_week_probs, 
                                                      2, composite_two_week_probs, 2, sample_size=20)

    print('Local two week model won ' + str(m1_wins) + ' times')
    print('Composite two week model won ' + str(m2_wins) + ' times')
    print()
    
    print('--------------------------------------------------')
    print('Composite 2 week model v.s. Composite 3 week model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(weekly_movement_categories, composite_two_week_probs, 
                                                      2, composite_three_week_probs, 3, sample_size=25)

    print('Composite two week model won ' + str(m1_wins) + ' times')
    print('Composite three week model won ' + str(m2_wins) + ' times')
    print()
    
    print('--------------------------------------------------')
    print('Composite 1 month model v.s. Composite 2 month model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_one_month_probs, 
                                                      1, composite_two_month_probs, 2, sample_size=10)

    print('Composite one month model won ' + str(m1_wins) + ' times')
    print('Composite two month model won ' + str(m2_wins) + ' times')
    print()

    print('--------------------------------------------------')
    print('Composite 2 month model v.s. Composite 3 month model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_two_month_probs, 
                                                      2, composite_three_month_probs, 3, sample_size=10)

    print('Composite two month model won ' + str(m1_wins) + ' times')
    print('Composite three month model won ' + str(m2_wins) + ' times')
    print()
    
    print('--------------------------------------------------')
    print('Composite 1 month model v.s. Composite 3 month model')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = random_sample_tests_m1_m2(monthly_movement_categories, composite_one_month_probs, 
                                                      1, composite_three_month_probs, 3, sample_size=10)

    print('Composite one month model won ' + str(m1_wins) + ' times')
    print('Composite three month model won ' + str(m2_wins) + ' times')
    print()
    
    print()

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             BAC
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Local 1 week model v.s. Local 2 week model
--------------------------------------------------
Local one week model won 780 times
Local two week model won 8940 times

--------------------------------------------------
Composite 1 week model v.s. Composite 2 week model
--------------------------------------------------
Composite one week model won 207 times
Composite two week model won 9762 times

--------------------------------------------------
Local 2 week model v.s. Composite 2 week model
--------------------------------------------------
Local two week model won 10000 times
Composite two week model won 0 times

--------------------------------------------------
Composite 2 week model v.s. Composite 3 week model
--------------------------------------------------
Composite two week model won 220 times
Composite three week model won 9587 times

-------------

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             MU
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Local 1 week model v.s. Local 2 week model
--------------------------------------------------
Local one week model won 2070 times
Local two week model won 7475 times

--------------------------------------------------
Composite 1 week model v.s. Composite 2 week model
--------------------------------------------------
Composite one week model won 572 times
Composite two week model won 9323 times

--------------------------------------------------
Local 2 week model v.s. Composite 2 week model
--------------------------------------------------
Local two week model won 8690 times
Composite two week model won 1081 times

--------------------------------------------------
Composite 2 week model v.s. Composite 3 week model
--------------------------------------------------
Composite two week model won 1704 times
Composite three week model won 7733 times

----------

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             INTC
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Local 1 week model v.s. Local 2 week model
--------------------------------------------------
Local one week model won 2662 times
Local two week model won 6520 times

--------------------------------------------------
Composite 1 week model v.s. Composite 2 week model
--------------------------------------------------
Composite one week model won 2089 times
Composite two week model won 7734 times

--------------------------------------------------
Local 2 week model v.s. Composite 2 week model
--------------------------------------------------
Local two week model won 9182 times
Composite two week model won 668 times

--------------------------------------------------
Composite 2 week model v.s. Composite 3 week model
--------------------------------------------------
Composite two week model won 3388 times
Composite three week model won 6020 times

--------

# Volume Models

In [27]:
df = pd.DataFrame()
df = df.from_csv('stock_data/nvda.csv')

daily_rel_volumes_25 = get_relative_volume(df, relative_period=25)
daily_rel_volumes_50 = get_relative_volume(df, relative_period=50)
daily_rel_volume_categories_25 = categorize_volumes(daily_rel_volumes_25)
daily_rel_volume_categories_50 = categorize_volumes(daily_rel_volumes_50)

daily_movements = get_price_movements(df)
daily_movement_categories = categorize_movements(daily_movements, n_cats=4)

In [28]:
local_two_day_vol_trends_25 = get_two_day_volume_trends(daily_rel_volume_categories_25, daily_movement_categories)
local_two_day_vol_trends_50 = get_two_day_volume_trends(daily_rel_volume_categories_50, daily_movement_categories)

In [29]:
local_two_day_probs_25 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_25, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
local_two_day_probs_50 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_50, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])

## Local 2 day Relative Volume Model: RP=25 v.s. RP=50

In [30]:
m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_25, 2, local_two_day_probs_50, 2)
    
print('RP=25 won ' + str(m1_wins) + ' times')
print('RP=50 won ' + str(m2_wins) + ' times')

RP=25 won 7380 times
RP=50 won 2399 times


#### All stocks -- Local
The sweet spot seems to be about a 20 day relative window

In [34]:
for filename in g:
    ticker = ticker_from_csv(filename)
    df = pd.DataFrame()
    df = df.from_csv(filename)
    
    daily_rel_volumes_10 = get_relative_volume(df, relative_period=10)
    daily_rel_volumes_20 = get_relative_volume(df, relative_period=20)
    daily_rel_volumes_30 = get_relative_volume(df, relative_period=30)
    daily_rel_volumes_40 = get_relative_volume(df, relative_period=40)
    daily_rel_volumes_50 = get_relative_volume(df, relative_period=50)
    daily_rel_volume_categories_10 = categorize_volumes(daily_rel_volumes_10)
    daily_rel_volume_categories_20 = categorize_volumes(daily_rel_volumes_20)
    daily_rel_volume_categories_30 = categorize_volumes(daily_rel_volumes_30)
    daily_rel_volume_categories_40 = categorize_volumes(daily_rel_volumes_40)
    daily_rel_volume_categories_50 = categorize_volumes(daily_rel_volumes_50)
    
    daily_movements = get_price_movements(df)
    daily_movement_categories = categorize_movements(daily_movements, n_cats=4)
    
    local_two_day_vol_trends_10 = get_two_day_volume_trends(daily_rel_volume_categories_10, daily_movement_categories)
    local_two_day_vol_trends_20 = get_two_day_volume_trends(daily_rel_volume_categories_20, daily_movement_categories)
    local_two_day_vol_trends_30 = get_two_day_volume_trends(daily_rel_volume_categories_30, daily_movement_categories)
    local_two_day_vol_trends_40 = get_two_day_volume_trends(daily_rel_volume_categories_40, daily_movement_categories)
    local_two_day_vol_trends_50 = get_two_day_volume_trends(daily_rel_volume_categories_50, daily_movement_categories)
    
    local_two_day_probs_10 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_10, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
    local_two_day_probs_20 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_20, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
    local_two_day_probs_30 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_30, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
    local_two_day_probs_40 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_40, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
    local_two_day_probs_50 = build_model_probabilities(daily_movement_categories, local_two_day_vol_trends_50, 2, 
                                               previous_category_types=['vl', 'l', 'h', 'vh'])
    
    m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_25, 2, local_two_day_probs_50, 2)
    
    print('=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
    print('             ' + ticker)
    print('=-=-=-=-=-=-=-=-=-=-=-=-=-=-=')
    print()
    print('--------------------------------------------------')
    print('Local 2 day Relative Volume Model: RP=10 v.s. RP=20')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_10, 2, local_two_day_probs_20, 2)
    print('RP=10 won ' + str(m1_wins) + ' times')
    print('RP=20 won ' + str(m2_wins) + ' times')
    print()
    print('--------------------------------------------------')
    print('Local 2 day Relative Volume Model: RP=20 v.s. RP=30')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_20, 2, local_two_day_probs_30, 2)
    print('RP=20 won ' + str(m1_wins) + ' times')
    print('RP=30 won ' + str(m2_wins) + ' times')
    print()
    print('--------------------------------------------------')
    print('Local 2 day Relative Volume Model: RP=30 v.s. RP=40')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_30, 2, local_two_day_probs_40, 2)
    print('RP=30 won ' + str(m1_wins) + ' times')
    print('RP=40 won ' + str(m2_wins) + ' times')
    print()
    print('--------------------------------------------------')
    print('Local 2 day Relative Volume Model: RP=40 v.s. RP=50')
    print('--------------------------------------------------')
    m1_wins, m2_wins, n_draws = \
    random_sample_tests_m1_m2(daily_movement_categories, local_two_day_probs_40, 2, local_two_day_probs_50, 2)
    print('RP=40 won ' + str(m1_wins) + ' times')
    print('RP=50 won ' + str(m2_wins) + ' times')
    print()
    print()

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             BAC
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Local 2 day Relative Volume Model: RP=10 v.s. RP=20
--------------------------------------------------
RP=10 won 8137 times
RP=20 won 1774 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=20 v.s. RP=30
--------------------------------------------------
RP=20 won 9366 times
RP=30 won 556 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=30 v.s. RP=40
--------------------------------------------------
RP=30 won 6891 times
RP=40 won 2763 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=40 v.s. RP=50
--------------------------------------------------
RP=40 won 1876 times
RP=50 won 7805 times


=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             NVDA
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Loca

RP=20 won 7390 times
RP=30 won 2411 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=30 v.s. RP=40
--------------------------------------------------
RP=30 won 1987 times
RP=40 won 7897 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=40 v.s. RP=50
--------------------------------------------------
RP=40 won 943 times
RP=50 won 8995 times


=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
             INTC
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

--------------------------------------------------
Local 2 day Relative Volume Model: RP=10 v.s. RP=20
--------------------------------------------------
RP=10 won 2259 times
RP=20 won 7588 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=20 v.s. RP=30
--------------------------------------------------
RP=20 won 8640 times
RP=30 won 1211 times

--------------------------------------------------
Local 2 day Relative Volume Model: RP=3

## Local 2 day Relative Volume Model (RP=20) v.s. Composite 2 day Relative Volume Model 