In [1]:
import hana_ml.dataframe as dataframe
import pandas as pd

In [2]:
cc = dataframe.ConnectionContext(userkey = 'hana_ml_sl2044', encrypt = 'true', sslValidateCertificate = 'false')

In [3]:
# create the dataframe containing sales_document and material for any sale that contains a door (excluding components)
df_remote = cc.sql('SELECT SALES_DOCUMENT, MATERIAL \
                    FROM SALES_SC_LEVEL \
                    WHERE COMPONENT_FLAG <> 1 \
                    AND SALES_DOCUMENT IN (\
                        SELECT DISTINCT SALES_DOCUMENT \
                        FROM SALES_SC_LEVEL \
                        WHERE COMPONENT_FLAG <> 1 \
                        AND CATEGORY = \'D\' ) ')

In [4]:
# create a dictionary of material descriptions to help with a more meaningful
df_remote_desc = cc.table('MATERIAL_DESC')
desc_lookup = df_remote_desc.collect()[["MATERIAL","TXTMD"]].set_index('MATERIAL').T.to_dict('records')[0]

In [5]:
# show the top 3 items in our source dataset
df_remote.head(3).collect()

Unnamed: 0,SALES_DOCUMENT,MATERIAL
0,F150118160,LAL0130
1,E260199614,SNK2143
2,F270075868,LAL1211


In [6]:
# perform a statistical analysis of the data
df_remote.describe().collect()

Unnamed: 0,column,count,unique,nulls,mean,std,min,max,median,25_percent_cont,25_percent_disc,50_percent_cont,50_percent_disc,75_percent_cont,75_percent_disc
0,SALES_DOCUMENT,4010599,1074022,0,,,,,,,,,,,
1,MATERIAL,4010599,39765,0,,,,,,,,,,,


### DEFINITION

Association rules analysis is a technique to uncover how items are associated to each other. There are three common ways to measure association.

**Measure 1: Support.** This says how popular an itemset is, as measured by the proportion of transactions in which an itemset appears. In Table 1 below, the support of {apple} is 4 out of 8, or 50%. Itemsets can also contain multiple items. For instance, the support of {apple, beer, rice} is 2 out of 8, or 25%.

<div style='text-align: center'>
    <img src='association-rule-support-eqn.png' alt="Association Rule Support eqn" width="165" height="45">
</div>


<div style='text-align: center'>
    <img src='association-rule-support-table.png' alt="association-rule-support-table" width="326" height="289">    
    <p style='text-align: center'>Table 1. Example Transactions</p>
</div>

If you discover that sales of items beyond a certain proportion tend to have a significant impact on your profits, you might consider using that proportion as your support threshold. You may then identify itemsets with support values above this threshold as significant itemsets.

**Measure 2: Confidence.** This says how likely item Y is purchased when item X is purchased, expressed as {X -> Y}. This is measured by the proportion of transactions with item X, in which item Y also appears. In Table 1, the confidence of {apple -> beer} is 3 out of 4, or 75%.

<div style='text-align: center'>
    <img src='association-rule-confidence-eqn.png' alt="Association Rule Confidence eqn" width="351" height="51">
</div>

One drawback of the confidence measure is that it might misrepresent the importance of an association. This is because it only accounts for how popular apples are, but not beers. If beers are also very popular in general, there will be a higher chance that a transaction containing apples will also contain beers, thus inflating the confidence measure. To account for the base popularity of both constituent items, we use a third measure called lift.

**Measure 3: Lift.** This says how likely item Y is purchased when item X is purchased, while controlling for how popular item Y is. In Table 1, the lift of {apple -> beer} is 1, which implies no association between items. A lift value greater than 1 means that item Y is likely to be bought if item X is bought, while a value less than 1 means that item Y is unlikely to be bought if item X is bought.

<div style='text-align: center'>
    <img src='association-rule-lift-eqn.png' alt="Association Rule Lift eqn" width="377" height="52">
</div>


In [7]:
# use the Apriori algorithm
from hana_ml.algorithms.pal.association import Apriori

In [8]:
# set up the association model
ap = Apriori( min_support= 0.0001,
              min_confidence=0.5,
              relational=False,
              min_lift=1,
              max_conseq=1,
              max_len=5,
              ubiquitous=1.0,
              use_prefix_tree=False,
              thread_ratio=0,
              timeout=3600,
              pmml_export='single-row')

In [9]:
# run the model on the dataset
ap.fit(data=df_remote)

In [10]:
# collect the results into a dataframe for manipulation
df_result = ap.result_.collect()

In [11]:
# top 10 results by confidence (the likelihood of this happening based on the number of transactions)
# The antecedent is the list of items in the basket that appear before (or predict) the consequent item
df_result.sort_values(by=['CONFIDENCE'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT,CONSEQUENT,SUPPORT,CONFIDENCE,LIFT
6049,KIT0007&KIT0006&KSC0102,KIT0001,0.000262,1.0,2926.490463
4570,HKB1188&TAP3303&HYH8400,GIR0119,0.000101,1.0,320.030393
4566,HKB1188&RKC0156&HYH8400,GIR0119,0.000102,1.0,320.030393
499,KSC0112,KSC0102,0.000205,1.0,3356.31875
4562,HKB1188&RKC0156&TAP3303,GIR0119,0.000103,1.0,320.030393
497,KSC0110,KSC0102,0.000237,1.0,3356.31875
493,KSC0110,KSC0114,0.000237,1.0,3509.875817
3876,SRA0005&NWL0037,SRA0004,0.000122,1.0,1245.965197
489,KSC0112,KIT0006,0.000205,1.0,2942.526027
487,KSC0110,KIT0006,0.000237,1.0,2942.526027


In [12]:
# top 10 results by confidence (the likelihood of this happening based on the antecedent occurring to the same consequent)
df_result.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT,CONSEQUENT,SUPPORT,CONFIDENCE,LIFT
3382,WHT4160&GNO2420,GNO2129,0.00012,0.934783,6519.331733
3303,SNK5176&GNO2129,GNO2420,0.000103,0.840909,6497.517005
5781,SNK5176&WHT4160&GNO2129,GNO2420,0.000103,0.840909,6497.517005
5789,WHT4160&GNO2920&GNO2129,GNO2420,0.000102,0.839695,6488.133341
3410,GNO2920&GNO2129,GNO2420,0.000102,0.839695,6488.133341
3383,WHT4160&GNO2129,GNO2420,0.00012,0.837662,6472.430066
126,GNO2129,GNO2420,0.00012,0.837662,6472.430066
125,GNO2420,GNO2129,0.00012,0.928058,6472.430066
3302,SNK5176&GNO2420,GNO2129,0.000103,0.925,6451.106169
5780,SNK5176&WHT4160&GNO2420,GNO2129,0.000103,0.925,6451.106169


In [13]:
# split the antecedent into columns to enable us to show the SKU description for analysis
df_result = df_result.join( df_result['ANTECEDENT'].str.split('&',expand=True).rename(columns={0:'ANTECEDENT_1', 1:'ANTECEDENT_2', 2:'ANTECEDENT_3', 3:'ANTECEDENT_4', 4:'ANTECEDENT_5'} ) )
df_result.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT,CONSEQUENT,SUPPORT,CONFIDENCE,LIFT,ANTECEDENT_1,ANTECEDENT_2,ANTECEDENT_3,ANTECEDENT_4
3382,WHT4160&GNO2420,GNO2129,0.00012,0.934783,6519.331733,WHT4160,GNO2420,,
3303,SNK5176&GNO2129,GNO2420,0.000103,0.840909,6497.517005,SNK5176,GNO2129,,
5781,SNK5176&WHT4160&GNO2129,GNO2420,0.000103,0.840909,6497.517005,SNK5176,WHT4160,GNO2129,
5789,WHT4160&GNO2920&GNO2129,GNO2420,0.000102,0.839695,6488.133341,WHT4160,GNO2920,GNO2129,
3410,GNO2920&GNO2129,GNO2420,0.000102,0.839695,6488.133341,GNO2920,GNO2129,,
3383,WHT4160&GNO2129,GNO2420,0.00012,0.837662,6472.430066,WHT4160,GNO2129,,
126,GNO2129,GNO2420,0.00012,0.837662,6472.430066,GNO2129,,,
125,GNO2420,GNO2129,0.00012,0.928058,6472.430066,GNO2420,,,
3302,SNK5176&GNO2420,GNO2129,0.000103,0.925,6451.106169,SNK5176,GNO2420,,
5780,SNK5176&WHT4160&GNO2420,GNO2129,0.000103,0.925,6451.106169,SNK5176,WHT4160,GNO2420,


In [14]:
df_basket_analysis = dataframe.create_dataframe_from_pandas(cc, df_result, 'Basket_Analysis_2019', force=True, replace=True)

100%|██████████| 1/1 [00:00<00:00,  2.19it/s]


In [15]:
df_result.to_csv('df_basket_analysis.csv', index=False)

In [16]:
# add sku descriptions to items to make it more readable
df_result['CONSEQUENT_DESC'] = df_result['CONSEQUENT'].map(desc_lookup).fillna(df_result['CONSEQUENT'])
df_result['ANTECEDENT_1_DESC'] = df_result['ANTECEDENT_1'].map(desc_lookup).fillna(df_result['ANTECEDENT_1'])
df_result['ANTECEDENT_2_DESC'] = df_result['ANTECEDENT_2'].map(desc_lookup).fillna(df_result['ANTECEDENT_2'])
df_result['ANTECEDENT_3_DESC'] = df_result['ANTECEDENT_3'].map(desc_lookup).fillna(df_result['ANTECEDENT_3'])
df_result['ANTECEDENT_4_DESC'] = df_result['ANTECEDENT_4'].map(desc_lookup).fillna(df_result['ANTECEDENT_4'])
#df_result['ANTECEDENT_5_DESC'] = df_result['ANTECEDENT_5'].map(desc_lookup).fillna(df_result['ANTECEDENT_5'])

# create a new dataframe of the result using sku descriptions
df_result_desc = df_result[['ANTECEDENT_1_DESC','ANTECEDENT_2_DESC','ANTECEDENT_3_DESC','ANTECEDENT_4_DESC','CONSEQUENT_DESC','SUPPORT','CONFIDENCE','LIFT']]

In [17]:
df_result_desc.to_csv('df_basket_analysis_desc.csv', index=False)

In [18]:
# show the result (ordered by lift) using SKU descriptions
df_result_desc.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT_1_DESC,ANTECEDENT_2_DESC,ANTECEDENT_3_DESC,ANTECEDENT_4_DESC,CONSEQUENT_DESC,SUPPORT,CONFIDENCE,LIFT
3277,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo Nh 170D 500 Full Ht Wall,,,Gno Lo 170D 500 4 Dwr Base,0.00012,0.934783,6519.331733
5785,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Drayton 1.5 bowl inc waste,,Gno Lo Nh 170D 500 Full Ht Wall,0.000103,0.840909,6497.517005
3405,Gno Lo 170D 500 4 Dwr Base,Drayton 1.5 bowl inc waste,,,Gno Lo Nh 170D 500 Full Ht Wall,0.000103,0.840909,6497.517005
3402,Gno Lo 170D 500 4 Dwr Base,Gno Continuous Plinth,,,Gno Lo Nh 170D 500 Full Ht Wall,0.000102,0.839695,6488.133341
5781,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Gno Continuous Plinth,,Gno Lo Nh 170D 500 Full Ht Wall,0.000102,0.839695,6488.133341
3278,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,,,Gno Lo Nh 170D 500 Full Ht Wall,0.00012,0.837662,6472.430066
112,Gno Lo 170D 500 4 Dwr Base,,,,Gno Lo Nh 170D 500 Full Ht Wall,0.00012,0.837662,6472.430066
111,Gno Lo Nh 170D 500 Full Ht Wall,,,,Gno Lo 170D 500 4 Dwr Base,0.00012,0.928058,6472.430066
3404,Gno Lo Nh 170D 500 Full Ht Wall,Drayton 1.5 bowl inc waste,,,Gno Lo 170D 500 4 Dwr Base,0.000103,0.925,6451.106169
5784,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo Nh 170D 500 Full Ht Wall,Drayton 1.5 bowl inc waste,,Gno Lo 170D 500 4 Dwr Base,0.000103,0.925,6451.106169


In [19]:
# show the result (ordered by confidence) using SKU descriptions
df_result_desc.sort_values(by=['CONFIDENCE'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT_1_DESC,ANTECEDENT_2_DESC,ANTECEDENT_3_DESC,ANTECEDENT_4_DESC,CONSEQUENT_DESC,SUPPORT,CONFIDENCE,LIFT
6173,88x2070x25 Kit Std Primed-Wh Hinges-826,88x2070x25 Kit Std Primed-Wh Hinges-626,125x2070x25 Kit Std Primed-Wh Hinges-826,,826 6PG Std Doorkit Leaf,0.000154,1.0,2942.526027
5695,NP Tubular econ 2.5 mort.latch,Blk Gran Effect 3m 600 38 8mm rad W/top,Drayton 1.5 bowl inc waste,,S/S Effect T Bar Handle - 230mm,0.000106,1.0,169.725348
5787,Gno Lo 170D 500 4 Dwr Base,Black 38mm End Cap,Light Oak plinth corner strips,,Blk Gran Effect 3m 600 38 8mm rad W/top,0.000103,1.0,212.299269
6230,Stainless Steel Effect D Handle,Blk Gran Effect 3m 600 38 8mm rad W/top,HD Leg Plinth Return Pack,Lamona Chrome Effect Monobloc Tap,Black 38mm End Cap,0.000102,1.0,320.030393
6090,826 6PG Bath Doorkit Leaf,88x2070x25 Kit Std Primed-Wh Hinges-826,88x2070x25 Kit Std Primed-Wh Hinges-626,,826 6PG Std Doorkit Leaf,0.000237,1.0,2942.526027
4555,Stainless Steel Effect D Handle,Blk Gran Effect 3m 600 38 8mm rad W/top,Lamona Chrome Effect Monobloc Tap,,Black 38mm End Cap,0.000103,1.0,320.030393
4151,826 6PG Bath Doorkit Leaf,125x2070x25 Kit Std Primed-Wh Hinges-826,,,88x2070x25 Kit Bath Primed-Wh Hnges-826,0.000205,1.0,3356.31875
4150,88x2070x25 Kit Bath Primed-Wh Hnges-826,125x2070x25 Kit Std Primed-Wh Hinges-826,,,826 6PG Bath Doorkit Leaf,0.000205,1.0,2926.490463
5789,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Light Oak plinth corner strips,,Black 38mm End Cap,0.000103,1.0,320.030393
5790,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Black 38mm End Cap,,Light Oak plinth corner strips,0.000103,1.0,3051.198864


In [20]:
from hana_ml.algorithms.pal.association import FPGrowth

In [21]:
fpg = FPGrowth(
                   min_support=0.0001,
                   min_confidence=0.5,
                   relational=False,
                   min_lift=1.0,
                   max_conseq=1,
                   max_len=5,
                   ubiquitous=1.0,
                   thread_ratio=0,
                   timeout=3600)

In [22]:
fpg.fit(data=df_remote)

In [23]:
df_fpg_result = fpg.result_.collect()
df_fpg_result.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT,CONSEQUENT,SUPPORT,CONFIDENCE,LIFT
29537,HNG0025&DCL0007&DCL0007&SCF0009,GIR0038,0.013218,14.0,51318.457338
28963,HNG0025&DXE0001&DXE0001&LAL0505,GIR0038,0.015421,14.0,51318.457338
30422,HNG0025&LAL0230&LAL0230&DFU0205,GIR0038,0.015056,14.0,51318.457338
29121,HNG0025&DXE0001&DCL0007&DFU0205,GIR0038,0.026096,14.0,51318.457338
29156,HNG0025&DXE0001&SCF0009&SCF0009,GIR0038,0.014234,14.0,51318.457338
29275,HNG0025&DXE0001&DFR0111&LAL0505,GIR0038,0.030841,14.0,51318.457338
29295,HNG0025&DXE0001&DFR0111&DFU0205,GIR0038,0.026096,14.0,51318.457338
29337,HNG0025&DXE0001&LAL0505&LAL0505,GIR0038,0.014234,14.0,51318.457338
29372,HNG0025&DXE0001&LAL0230&LAL0230,GIR0038,0.019162,14.0,51318.457338
29606,HNG0025&DCL0007&DCL0007&DFU0205,GIR0038,0.011184,14.0,51318.457338


In [24]:
df_fpg_result = df_fpg_result.join( df_fpg_result['ANTECEDENT'].str.split('&',expand=True).rename(columns={0:'ANTECEDENT_1', 1:'ANTECEDENT_2', 2:'ANTECEDENT_3', 3:'ANTECEDENT_4', 4:'ANTECEDENT_5'} ) )
df_fpg_result.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT,CONSEQUENT,SUPPORT,CONFIDENCE,LIFT,ANTECEDENT_1,ANTECEDENT_2,ANTECEDENT_3,ANTECEDENT_4
29537,HNG0025&DCL0007&DCL0007&SCF0009,GIR0038,0.013218,14.0,51318.457338,HNG0025,DCL0007,DCL0007,SCF0009
28963,HNG0025&DXE0001&DXE0001&LAL0505,GIR0038,0.015421,14.0,51318.457338,HNG0025,DXE0001,DXE0001,LAL0505
30422,HNG0025&LAL0230&LAL0230&DFU0205,GIR0038,0.015056,14.0,51318.457338,HNG0025,LAL0230,LAL0230,DFU0205
29121,HNG0025&DXE0001&DCL0007&DFU0205,GIR0038,0.026096,14.0,51318.457338,HNG0025,DXE0001,DCL0007,DFU0205
29156,HNG0025&DXE0001&SCF0009&SCF0009,GIR0038,0.014234,14.0,51318.457338,HNG0025,DXE0001,SCF0009,SCF0009
29275,HNG0025&DXE0001&DFR0111&LAL0505,GIR0038,0.030841,14.0,51318.457338,HNG0025,DXE0001,DFR0111,LAL0505
29295,HNG0025&DXE0001&DFR0111&DFU0205,GIR0038,0.026096,14.0,51318.457338,HNG0025,DXE0001,DFR0111,DFU0205
29337,HNG0025&DXE0001&LAL0505&LAL0505,GIR0038,0.014234,14.0,51318.457338,HNG0025,DXE0001,LAL0505,LAL0505
29372,HNG0025&DXE0001&LAL0230&LAL0230,GIR0038,0.019162,14.0,51318.457338,HNG0025,DXE0001,LAL0230,LAL0230
29606,HNG0025&DCL0007&DCL0007&DFU0205,GIR0038,0.011184,14.0,51318.457338,HNG0025,DCL0007,DCL0007,DFU0205


In [25]:
df_fpg_result['CONSEQUENT_DESC'] = df_fpg_result['CONSEQUENT'].map(desc_lookup).fillna(df_fpg_result['CONSEQUENT'])
df_fpg_result['ANTECEDENT_1_DESC'] = df_fpg_result['ANTECEDENT_1'].map(desc_lookup).fillna(df_fpg_result['ANTECEDENT_1'])
df_fpg_result['ANTECEDENT_2_DESC'] = df_fpg_result['ANTECEDENT_2'].map(desc_lookup).fillna(df_fpg_result['ANTECEDENT_2'])
df_fpg_result['ANTECEDENT_3_DESC'] = df_fpg_result['ANTECEDENT_3'].map(desc_lookup).fillna(df_fpg_result['ANTECEDENT_3'])
df_fpg_result['ANTECEDENT_4_DESC'] = df_fpg_result['ANTECEDENT_4'].map(desc_lookup).fillna(df_fpg_result['ANTECEDENT_4'])

df_fpg_result_desc = df_fpg_result[['ANTECEDENT_1_DESC','ANTECEDENT_2_DESC','ANTECEDENT_3_DESC','ANTECEDENT_4_DESC','CONSEQUENT_DESC','SUPPORT','CONFIDENCE','LIFT']]

In [26]:
df_fpg_result_desc.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT_1_DESC,ANTECEDENT_2_DESC,ANTECEDENT_3_DESC,ANTECEDENT_4_DESC,CONSEQUENT_DESC,SUPPORT,CONFIDENCE,LIFT
29537,"4"" SSS Ball Bearing Hinge Grade 11",Silver briton 2003 dr closer,Silver briton 2003 dr closer,Era chrome door viewer,933mm Aluminium Threshex,0.013218,14.0,51318.457338
28963,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,Hardwood weather bar,Nightlatch yale 89 gry case brs cyl,933mm Aluminium Threshex,0.015421,14.0,51318.457338
30422,"4"" SSS Ball Bearing Hinge Grade 11","Euro/oval cyl deadlck case 3"" satin","Euro/oval cyl deadlck case 3"" satin",Vict'n c/plated cylinder pull,933mm Aluminium Threshex,0.015056,14.0,51318.457338
29121,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,Silver briton 2003 dr closer,Vict'n c/plated cylinder pull,933mm Aluminium Threshex,0.026096,14.0,51318.457338
29156,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,Era chrome door viewer,Era chrome door viewer,933mm Aluminium Threshex,0.014234,14.0,51318.457338
29275,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,2'6 Unass FD30 No sill Ex Door Fr,Nightlatch yale 89 gry case brs cyl,933mm Aluminium Threshex,0.030841,14.0,51318.457338
29295,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,2'6 Unass FD30 No sill Ex Door Fr,Vict'n c/plated cylinder pull,933mm Aluminium Threshex,0.026096,14.0,51318.457338
29337,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,Nightlatch yale 89 gry case brs cyl,Nightlatch yale 89 gry case brs cyl,933mm Aluminium Threshex,0.014234,14.0,51318.457338
29372,"4"" SSS Ball Bearing Hinge Grade 11",Hardwood weather bar,"Euro/oval cyl deadlck case 3"" satin","Euro/oval cyl deadlck case 3"" satin",933mm Aluminium Threshex,0.019162,14.0,51318.457338
29606,"4"" SSS Ball Bearing Hinge Grade 11",Silver briton 2003 dr closer,Silver briton 2003 dr closer,Vict'n c/plated cylinder pull,933mm Aluminium Threshex,0.011184,14.0,51318.457338


In [27]:
df_result_desc.sort_values(by=['LIFT'], ascending=False).head(10)

Unnamed: 0,ANTECEDENT_1_DESC,ANTECEDENT_2_DESC,ANTECEDENT_3_DESC,ANTECEDENT_4_DESC,CONSEQUENT_DESC,SUPPORT,CONFIDENCE,LIFT
3277,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo Nh 170D 500 Full Ht Wall,,,Gno Lo 170D 500 4 Dwr Base,0.00012,0.934783,6519.331733
5785,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Drayton 1.5 bowl inc waste,,Gno Lo Nh 170D 500 Full Ht Wall,0.000103,0.840909,6497.517005
3405,Gno Lo 170D 500 4 Dwr Base,Drayton 1.5 bowl inc waste,,,Gno Lo Nh 170D 500 Full Ht Wall,0.000103,0.840909,6497.517005
3402,Gno Lo 170D 500 4 Dwr Base,Gno Continuous Plinth,,,Gno Lo Nh 170D 500 Full Ht Wall,0.000102,0.839695,6488.133341
5781,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,Gno Continuous Plinth,,Gno Lo Nh 170D 500 Full Ht Wall,0.000102,0.839695,6488.133341
3278,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo 170D 500 4 Dwr Base,,,Gno Lo Nh 170D 500 Full Ht Wall,0.00012,0.837662,6472.430066
112,Gno Lo 170D 500 4 Dwr Base,,,,Gno Lo Nh 170D 500 Full Ht Wall,0.00012,0.837662,6472.430066
111,Gno Lo Nh 170D 500 Full Ht Wall,,,,Gno Lo 170D 500 4 Dwr Base,0.00012,0.928058,6472.430066
3404,Gno Lo Nh 170D 500 Full Ht Wall,Drayton 1.5 bowl inc waste,,,Gno Lo 170D 500 4 Dwr Base,0.000103,0.925,6451.106169
5784,Blk Gran Effect 3m 600 38 8mm rad W/top,Gno Lo Nh 170D 500 Full Ht Wall,Drayton 1.5 bowl inc waste,,Gno Lo 170D 500 4 Dwr Base,0.000103,0.925,6451.106169


In [28]:
#from hana_ml.algorithms.pal.association import KORD

In [29]:
#krd =  KORD(k=50,measure='lift',min_support=0.001,min_confidence=0.5,epsilon=0.1,use_epsilon=False)

In [30]:
#krd.fit(data=df_remote, transaction='SALES_DOCUMENT' , item='MATERIAL')
#krd.result_.head(5).collect()