# HOSE Real-Estate Stock Market Analysis Code

# Step 1: Install packages

In [1]:
#!pip install mlxtend

# Step 2: Load packages

In [2]:
import pandas as pd

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# Step 3: Load dataset 

In [3]:
df = pd.read_excel(r'D:\HCMUT\212\Data Mining\presentation\Real Demo\VN-Index\real-estate.xlsx', index_col = 0)
df.head()

Unnamed: 0_level_0,Stock code,Year,Stock's return (%),Receivable turnover (RT) (times),Asset turnover (AT) (times),Current Ratio (CR) (times),Quick Ratio (QR) (times),Debt to Equity ratio (DE) (%),Return on equity (ROE) (%),Return on assets (ROA) (%),Net profit margin (NP) (%),Price Earning (P/E) (times),Operating cash flow/Revenue (OCF/R) (times),Investing cash flow/Revenue (ICF/R) (times),Financial cash flow/Revenue (FCF/R) (times)
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,AGG,2018,-,15.62,0.47,1.37,1.33,45.37,60.2,12.34,50.28,-,0.655723,-0.916593,0.301686
1,AGG,2019,-,3.39,0.1,1.57,0.69,73.65,28.99,8.49,86.64,-,-0.041558,-0.225974,-0.236364
2,AGG,2020,5.23,8.61,0.23,2.02,0.74,107.0,21.98,5.47,25.36,5.34,0.015393,-0.124287,0.326112
3,AGG,2021,81.18,7.18,0.16,1.83,0.72,92.46,16.79,3.76,23.29,10.26,0.479384,-0.206157,-0.339747
4,BCE,2018,1.8,0.45,0.28,1.22,0.78,23.0,10.25,3.02,10.83,4.62,0.093567,-0.073099,0.067251


In [4]:
df['Year'].value_counts()

2018    60
2019    60
2020    60
2021    60
Name: Year, dtype: int64

In [5]:
df1 = pd.read_excel(r'D:\HCMUT\212\Data Mining\presentation\Real Demo\VN-Index\VN-Index.xlsx', index_col = 0)
df1

Unnamed: 0_level_0,Year,Percent change of VN-Index (%),Average Receivable turnover (RT) (times),Average Asset turnover (AT) (times),Average Return on equity (ROE) (%),Average Return on assets (ROA) (%),Average Net profit margin (NP) (%),Average Price Earning (P/E) (times),Average Operating cash flow/Revenue (OCF/R) (times),Average Investing cash flow/Revenue (ICF/R) (times),Average Financial cash flow/Revenue (FCF/R) (times)
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2018,-9.32,6.0073,0.299277,11.230886,4.994135,20.857722,69.021378,-0.392861,-0.278091,0.675654
1,2019,7.67,6.030506,0.29834,11.020253,4.974346,20.775527,68.838761,-0.39424,-0.273829,0.672111
2,2020,14.87,6.072447,0.298851,10.933629,4.965105,20.530633,68.667533,-0.39195,-0.275663,0.674768
3,2021,35.73,6.061181,0.298511,10.859367,4.956287,20.517806,68.87326,-0.390753,-0.274208,0.671263


# Step 4: Data Pre-processing

In [6]:
# initializing encoded dataframe
encoded_df = df.copy(deep=True)
encoded_df.columns = ['Stock code', 'Year', 'Outperforming stock', 'RT=1', 'AT=1', 'CR=1', 'QR=1', 'DE=1', 'ROE=1', 'ROA=1', 'NP=1', 'P/E=1', 'OCF/R=1', 'ICF/R=1', 'FCF/R=1']
encoded_df.insert(2, "Non-outperforming stock", '-')
encoded_df.insert(4, "RT=0", '-')
encoded_df.insert(6, "AT=0", '-')
encoded_df.insert(8, "CR=0", '-')
encoded_df.insert(10, "QR=0", '-')
encoded_df.insert(12, "DE=0", '-')
encoded_df.insert(14, "ROE=0", '-')
encoded_df.insert(16, "ROA=0", '-')
encoded_df.insert(18, "NP=0", '-')
encoded_df.insert(20, "P/E=0", '-')
encoded_df.insert(22, "OCF/R=0", '-')
encoded_df.insert(24, "ICF/R=0", '-')
encoded_df.insert(26, "FCF/R=0", '-')

In [7]:
i = 0
for lab, row in df.iterrows():
    if row[2] != '-':
        if row[2] > df1.iloc[lab % 4, 1]:
            encoded_df.iloc[i, 2] = False
            encoded_df.iloc[i, 3] = True
        else:
            encoded_df.iloc[i, 2] = True
            encoded_df.iloc[i, 3] = False
    if row[3] != '-':
        if row[3] > df1.iloc[lab % 4, 2]:
            encoded_df.iloc[i, 4] = False
            encoded_df.iloc[i, 5] = True
        else:
            encoded_df.iloc[i, 4] = True
            encoded_df.iloc[i, 5] = False
    if row[4] != '-':
        if row[4] > df1.iloc[lab % 4, 3]:
            encoded_df.iloc[i, 6] = False
            encoded_df.iloc[i, 7] = True
        else:
            encoded_df.iloc[i, 6] = True
            encoded_df.iloc[i, 7] = False
    if row[5] != '-':
        if row[5] >= 1.5 and row[5] <= 3:
            encoded_df.iloc[i, 8] = False
            encoded_df.iloc[i, 9] = True
        else:
            encoded_df.iloc[i, 8] = True
            encoded_df.iloc[i, 9] = False
    if row[6] != '-':
        if row[6] >= 1:
            encoded_df.iloc[i, 10] = False
            encoded_df.iloc[i, 11] = True
        else:
            encoded_df.iloc[i, 10] = True
            encoded_df.iloc[i, 11] = False
    if row[7] != '-':
        if row[7] >= 100 and row[7] <= 150:
            encoded_df.iloc[i, 12] = False
            encoded_df.iloc[i, 13] = True
        else:
            encoded_df.iloc[i, 12] = True
            encoded_df.iloc[i, 13] = False
    if row[8] != '-':
        if row[8] > df1.iloc[lab % 4, 4]:
            encoded_df.iloc[i, 14] = False
            encoded_df.iloc[i, 15] = True
        else:
            encoded_df.iloc[i, 14] = True
            encoded_df.iloc[i, 15] = False
    if row[9] != '-':
        if row[9] > df1.iloc[lab % 4, 5]:
            encoded_df.iloc[i, 16] = False
            encoded_df.iloc[i, 17] = True
        else:
            encoded_df.iloc[i, 16] = True
            encoded_df.iloc[i, 17] = False
    if row[10] != '-':
        if row[10] > df1.iloc[lab % 4, 6]:
            encoded_df.iloc[i, 18] = False
            encoded_df.iloc[i, 19] = True
        else:
            encoded_df.iloc[i, 18] = True
            encoded_df.iloc[i, 19] = False
    if row[11] != '-':
        if row[11] > df1.iloc[lab % 4, 7]:
            encoded_df.iloc[i, 20] = False
            encoded_df.iloc[i, 21] = True
        else:
            encoded_df.iloc[i, 20] = True
            encoded_df.iloc[i, 21] = False
    if row[12] != '-':
        if row[12] > df1.iloc[lab % 4, 8]:
            encoded_df.iloc[i, 22] = False
            encoded_df.iloc[i, 23] = True
        else:
            encoded_df.iloc[i, 22] = True
            encoded_df.iloc[i, 23] = False
    if row[13] != '-':
        if row[13] < 0 and row[13] < df1.iloc[lab % 4, 9]:
            encoded_df.iloc[i, 24] = False
            encoded_df.iloc[i, 25] = True
        else:
            encoded_df.iloc[i, 24] = True
            encoded_df.iloc[i, 25] = False
    if row[14] != '-':
        if row[14] > df1.iloc[lab % 4, 10]:
            encoded_df.iloc[i, 26] = False
            encoded_df.iloc[i, 27] = True
        else:
            encoded_df.iloc[i, 26] = True
            encoded_df.iloc[i, 27] = False
    i += 1
encoded_df

Unnamed: 0_level_0,Stock code,Year,Non-outperforming stock,Outperforming stock,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,AGG,2018,-,-,False,True,False,True,True,False,...,False,True,-,-,False,True,False,True,True,False
1,AGG,2019,-,-,True,False,True,False,False,True,...,False,True,-,-,False,True,True,False,True,False
2,AGG,2020,True,False,False,True,True,False,False,True,...,False,True,True,False,False,True,True,False,True,False
3,AGG,2021,False,True,False,True,True,False,False,True,...,False,True,True,False,False,True,True,False,True,False
4,BCE,2018,False,True,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,VRC,2021,False,True,True,False,-,-,True,False,...,True,False,False,True,True,False,-,-,False,True
236,VRE,2018,True,False,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
237,VRE,2019,False,True,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
238,VRE,2020,True,False,False,True,True,False,False,True,...,False,True,True,False,False,True,False,True,True,False


In [8]:
encoded_df['Year'].value_counts()

2018    60
2019    60
2020    60
2021    60
Name: Year, dtype: int64

## seperate encoded_df into sr_df and fi_cf_df and then combine year 2018, 2019, 2020 of fi_cf_df with year 2019, 2020, 2021 of sr_df respectively

In [9]:
sr_df = encoded_df.iloc[:, 0:4]
sr_df

Unnamed: 0_level_0,Stock code,Year,Non-outperforming stock,Outperforming stock
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,AGG,2018,-,-
1,AGG,2019,-,-
2,AGG,2020,True,False
3,AGG,2021,False,True
4,BCE,2018,False,True
...,...,...,...,...
235,VRC,2021,False,True
236,VRE,2018,True,False
237,VRE,2019,False,True
238,VRE,2020,True,False


In [10]:
sr_df = sr_df.loc[sr_df["Year"] != 2018]
sr_df = sr_df.reset_index(drop=True)
sr_df

Unnamed: 0,Stock code,Year,Non-outperforming stock,Outperforming stock
0,AGG,2019,-,-
1,AGG,2020,True,False
2,AGG,2021,False,True
3,BCE,2019,False,True
4,BCE,2020,False,True
...,...,...,...,...
175,VRC,2020,True,False
176,VRC,2021,False,True
177,VRE,2019,False,True
178,VRE,2020,True,False


In [11]:
fi_cf_df = encoded_df.iloc[:, [0,1,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]]
fi_cf_df

Unnamed: 0_level_0,Stock code,Year,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,QR=0,QR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,AGG,2018,False,True,False,True,True,False,False,True,...,False,True,-,-,False,True,False,True,True,False
1,AGG,2019,True,False,True,False,False,True,True,False,...,False,True,-,-,False,True,True,False,True,False
2,AGG,2020,False,True,True,False,False,True,True,False,...,False,True,True,False,False,True,True,False,True,False
3,AGG,2021,False,True,True,False,False,True,True,False,...,False,True,True,False,False,True,True,False,True,False
4,BCE,2018,True,False,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,VRC,2021,True,False,-,-,True,False,False,True,...,True,False,False,True,True,False,-,-,False,True
236,VRE,2018,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
237,VRE,2019,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
238,VRE,2020,False,True,True,False,False,True,False,True,...,False,True,True,False,False,True,False,True,True,False


In [12]:
fi_cf_df = fi_cf_df.loc[fi_cf_df["Year"] != 2021]
fi_cf_df = fi_cf_df.reset_index(drop=True)
fi_cf_df

Unnamed: 0,Stock code,Year,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,QR=0,QR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
0,AGG,2018,False,True,False,True,True,False,False,True,...,False,True,-,-,False,True,False,True,True,False
1,AGG,2019,True,False,True,False,False,True,True,False,...,False,True,-,-,False,True,True,False,True,False
2,AGG,2020,False,True,True,False,False,True,True,False,...,False,True,True,False,False,True,True,False,True,False
3,BCE,2018,True,False,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
4,BCE,2019,True,False,False,True,False,True,False,True,...,True,False,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,VRC,2019,True,False,True,False,True,False,True,False,...,False,True,True,False,True,False,True,False,False,True
176,VRC,2020,True,False,True,False,True,False,True,False,...,True,False,False,True,True,False,True,False,False,True
177,VRE,2018,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
178,VRE,2019,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False


In [13]:
fi_cf_df.insert(2, "Non-outperforming stock in the next year", sr_df.iloc[:, 2])
fi_cf_df.insert(3, "Outperforming stock in the next year", sr_df.iloc[:, 3])
final_df = fi_cf_df.copy(deep=True)
final_df

Unnamed: 0,Stock code,Year,Non-outperforming stock in the next year,Outperforming stock in the next year,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
0,AGG,2018,-,-,False,True,False,True,True,False,...,False,True,-,-,False,True,False,True,True,False
1,AGG,2019,True,False,True,False,True,False,False,True,...,False,True,-,-,False,True,True,False,True,False
2,AGG,2020,False,True,False,True,True,False,False,True,...,False,True,True,False,False,True,True,False,True,False
3,BCE,2018,False,True,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
4,BCE,2019,False,True,True,False,False,True,False,True,...,True,False,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,VRC,2019,True,False,True,False,True,False,True,False,...,False,True,True,False,True,False,True,False,False,True
176,VRC,2020,False,True,True,False,True,False,True,False,...,True,False,False,True,True,False,True,False,False,True
177,VRE,2018,False,True,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
178,VRE,2019,True,False,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False


# Step 5: Data Cleaning

Removing rows having '-' cell(s)

In [14]:
for lab, row in final_df.iterrows():
    for cell in row:
        if cell == '-':
            final_df.drop(lab, inplace=True)
            break

final_df

Unnamed: 0,Stock code,Year,Non-outperforming stock in the next year,Outperforming stock in the next year,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
2,AGG,2020,False,True,False,True,True,False,False,True,...,False,True,True,False,False,True,True,False,True,False
3,BCE,2018,False,True,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
4,BCE,2019,False,True,True,False,False,True,False,True,...,True,False,True,False,False,True,True,False,True,False
5,BCE,2020,False,True,True,False,False,True,True,False,...,True,False,True,False,False,True,True,False,True,False
6,BCM,2018,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,VRC,2019,True,False,True,False,True,False,True,False,...,False,True,True,False,True,False,True,False,False,True
176,VRC,2020,False,True,True,False,True,False,True,False,...,True,False,False,True,True,False,True,False,False,True
177,VRE,2018,False,True,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
178,VRE,2019,True,False,False,True,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False


Delete column "Stock code" and "Year"

In [15]:
final_df.drop(["Stock code", "Year"], axis = 1, inplace = True)
final_df = final_df.reset_index(drop=True)
final_df

Unnamed: 0,Non-outperforming stock in the next year,Outperforming stock in the next year,RT=0,RT=1,AT=0,AT=1,CR=0,CR=1,QR=0,QR=1,...,NP=0,NP=1,P/E=0,P/E=1,OCF/R=0,OCF/R=1,ICF/R=0,ICF/R=1,FCF/R=0,FCF/R=1
0,False,True,False,True,True,False,False,True,True,False,...,False,True,True,False,False,True,True,False,True,False
1,False,True,True,False,True,False,True,False,True,False,...,True,False,True,False,False,True,True,False,True,False
2,False,True,True,False,False,True,False,True,False,True,...,True,False,True,False,False,True,True,False,True,False
3,False,True,True,False,False,True,True,False,False,True,...,True,False,True,False,False,True,True,False,True,False
4,False,True,True,False,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,True,False,True,False,True,False,True,False,True,False,...,False,True,True,False,True,False,True,False,False,True
137,False,True,True,False,True,False,True,False,True,False,...,True,False,False,True,True,False,True,False,False,True
138,False,True,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False
139,True,False,False,True,True,False,True,False,True,False,...,False,True,True,False,False,True,True,False,True,False


In [16]:
# final_df.to_csv(r'D:\HCMUT\212\Data Mining\presentation\Real Demo\VN-Index\final_df.csv', index = False)

# Step 6: Basket Creation

Now have structured the data properly, generate frequent itemsets that have lift > 1, confidence > 65%, support > 0.1

In [17]:
frequent_itemsets = apriori(final_df, min_support = 0.1, use_colnames = True)

In [18]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.382979,(Non-outperforming stock in the next year)
1,0.617021,(Outperforming stock in the next year)
2,0.695035,(RT=0)
3,0.304965,(RT=1)
4,0.58156,(AT=0)
...,...,...
13176,0.106383,"(NP=0, DE=0, OCF/R=1, ROE=0, QR=0, ROA=0, RT=0..."
13177,0.106383,"(DE=0, P/E=0, OCF/R=1, ROE=0, QR=0, ROA=0, RT=..."
13178,0.106383,"(NP=0, DE=0, P/E=0, OCF/R=1, ROA=0, QR=0, RT=0..."
13179,0.134752,"(NP=0, DE=0, P/E=0, OCF/R=1, ROE=0, ROA=0, RT=..."


# Step 7: Generate Rules

In [34]:
rules = association_rules(frequent_itemsets, metric = "confidence", min_threshold = 0.849)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Non-outperforming stock in the next year),(DE=0),0.382979,0.936170,0.368794,0.962963,1.028620,0.010261,1.723404
1,(Non-outperforming stock in the next year),(P/E=0),0.382979,0.929078,0.361702,0.944444,1.016539,0.005885,1.276596
2,(Non-outperforming stock in the next year),(OCF/R=1),0.382979,0.886525,0.340426,0.888889,1.002667,0.000905,1.021277
3,(Non-outperforming stock in the next year),(FCF/R=0),0.382979,0.907801,0.347518,0.907407,0.999566,-0.000151,0.995745
4,(Outperforming stock in the next year),(DE=0),0.617021,0.936170,0.567376,0.919540,0.982236,-0.010261,0.793313
...,...,...,...,...,...,...,...,...,...
59515,"(NP=0, P/E=0, ROE=0, QR=0, FCF/R=0, CR=0)","(ICF/R=0, OCF/R=1, DE=0, ROA=0)",0.163121,0.354610,0.141844,0.869565,2.452174,0.084000,4.947991
59516,"(NP=0, P/E=0, ROE=0, QR=0, ICF/R=0, CR=0)","(OCF/R=1, FCF/R=0, DE=0, ROA=0)",0.148936,0.411348,0.141844,0.952381,2.315271,0.080579,12.361702
59517,"(NP=0, OCF/R=1, ROE=0, QR=0, ICF/R=0, CR=0)","(P/E=0, FCF/R=0, DE=0, ROA=0)",0.163121,0.390071,0.141844,0.869565,2.229249,0.078215,4.676123
59518,"(NP=0, ROE=0, QR=0, FCF/R=0, ICF/R=0, CR=0)","(P/E=0, OCF/R=1, DE=0, ROA=0)",0.163121,0.382979,0.141844,0.869565,2.270531,0.079372,4.730496


In [35]:
rules1 = rules[rules["lift"] > 1]
rules1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Non-outperforming stock in the next year),(DE=0),0.382979,0.936170,0.368794,0.962963,1.028620,0.010261,1.723404
1,(Non-outperforming stock in the next year),(P/E=0),0.382979,0.929078,0.361702,0.944444,1.016539,0.005885,1.276596
2,(Non-outperforming stock in the next year),(OCF/R=1),0.382979,0.886525,0.340426,0.888889,1.002667,0.000905,1.021277
7,(Outperforming stock in the next year),(FCF/R=0),0.617021,0.907801,0.560284,0.908046,1.000269,0.000151,1.002660
8,(RT=0),(DE=0),0.695035,0.936170,0.659574,0.948980,1.013683,0.008903,1.251064
...,...,...,...,...,...,...,...,...,...
59515,"(NP=0, P/E=0, ROE=0, QR=0, FCF/R=0, CR=0)","(ICF/R=0, OCF/R=1, DE=0, ROA=0)",0.163121,0.354610,0.141844,0.869565,2.452174,0.084000,4.947991
59516,"(NP=0, P/E=0, ROE=0, QR=0, ICF/R=0, CR=0)","(OCF/R=1, FCF/R=0, DE=0, ROA=0)",0.148936,0.411348,0.141844,0.952381,2.315271,0.080579,12.361702
59517,"(NP=0, OCF/R=1, ROE=0, QR=0, ICF/R=0, CR=0)","(P/E=0, FCF/R=0, DE=0, ROA=0)",0.163121,0.390071,0.141844,0.869565,2.229249,0.078215,4.676123
59518,"(NP=0, ROE=0, QR=0, FCF/R=0, ICF/R=0, CR=0)","(P/E=0, OCF/R=1, DE=0, ROA=0)",0.163121,0.382979,0.141844,0.869565,2.270531,0.079372,4.730496


In [36]:
rules2 = rules1[rules1["consequents"] == frozenset({'Outperforming stock in the next year'})]
rules2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
6302,"(QR=0, ROE=1, OCF/R=1, RT=0)",(Outperforming stock in the next year),0.141844,0.617021,0.120567,0.85,1.377586,0.033047,2.553191
17819,"(P/E=0, ROE=1, OCF/R=1, QR=0, RT=0)",(Outperforming stock in the next year),0.141844,0.617021,0.120567,0.85,1.377586,0.033047,2.553191
17825,"(ROE=1, OCF/R=1, QR=0, FCF/R=0, RT=0)",(Outperforming stock in the next year),0.141844,0.617021,0.120567,0.85,1.377586,0.033047,2.553191
34168,"(P/E=0, ROE=1, OCF/R=1, QR=0, RT=0, FCF/R=0)",(Outperforming stock in the next year),0.141844,0.617021,0.120567,0.85,1.377586,0.033047,2.553191


In [37]:
res = rules2[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
res

Unnamed: 0,antecedents,consequents,support,confidence,lift
6302,"(QR=0, ROE=1, OCF/R=1, RT=0)",(Outperforming stock in the next year),0.120567,0.85,1.377586
17819,"(P/E=0, ROE=1, OCF/R=1, QR=0, RT=0)",(Outperforming stock in the next year),0.120567,0.85,1.377586
17825,"(ROE=1, OCF/R=1, QR=0, FCF/R=0, RT=0)",(Outperforming stock in the next year),0.120567,0.85,1.377586
34168,"(P/E=0, ROE=1, OCF/R=1, QR=0, RT=0, FCF/R=0)",(Outperforming stock in the next year),0.120567,0.85,1.377586
