<a href="https://colab.research.google.com/github/eceyucesoy/Sales-Fact/blob/main/question4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
!pip install apyori
from apyori import apriori
from IPython.display import display

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

def convertToTransactions(data):
  rowCount = data.shape[0]
  columnCount = data.shape[1]

  transactions = []
  for i in range(0, rowCount):
    transaction = []
    for j in range(0, columnCount):
      value = str(data.loc[i, j])
      if value!='nan':
        transaction.append(value)
    transactions.append(transaction)
  return transactions

# used for converting the transactional format (list of lists) into a pandas Dataframe, by sorting
def convertToDataFrame(results, sortBy=None):
  rules = []
  for item in results:
    for rule in item.ordered_statistics:
      items = item[0]
      support = item.support
      lhs = str([x for x in rule.items_base])
      arrow =  " -> "
      rhs = str([x for x in rule.items_add])
      confidence = rule.confidence
      lift = rule.lift
      rules.append([items, support,lhs,arrow,rhs,confidence,lift])
  output = pd.DataFrame(rules, columns=['items', 'support', 'lhs', 'direction', 'rhs', 'confidence', 'lift'])

  if sortBy:
    output = output.sort_values(by=[sortBy], ascending=False) # another option than assigning: inplace=True
  return output

# used for only generating the frequent itemsets by sorting, if you do not want to see the rules
def getFrequentItemset(transactions, support, length, sortBy=None):
  if isinstance(transactions, pd.DataFrame):
    transactions = convertToTransactions(transactions)

  records = apriori(transactions, min_support = support, max_length = length)
  results = list(records)

  rules = []
  for item in results:
    for rule in item.ordered_statistics:
      items = item[0]
      support = item.support
      rules.append([items,support])
  output = pd.DataFrame(rules, columns=['items', 'support'])

  if sortBy:
    output = output.sort_values(by=[sortBy], ascending=False) # another option than assigning: inplace=True
  return output

transactions = [['K', 'A', 'D', 'B'],
          ['D', 'A', 'C', 'E', 'B'],
          ['C', 'A', 'B', 'E'],
          ['B', 'A', 'D']]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **Minimum Support = 0,6**
# **Minimum Confidence = 0,8**

In [None]:
model = TransactionEncoder()
encoded = model.fit_transform(transactions)
transactions_dataFrame = pd.DataFrame(encoded, columns=model.columns_)
records = apriori(transactions, min_support = 0.6, min_confidence = 0.8)

result = list(records)

# This is for adjusting the output width for readability purposes
pd.set_option('max_colwidth', None)

display(pd.DataFrame(result))

# Let's use our helper utility function to see the results in a more suitable format, and apply sorting by the "lift" parameter
resultDataFrame = convertToDataFrame(result)
display(resultDataFrame)

Unnamed: 0,items,support,ordered_statistics
0,(A),1.0,"[((), (A), 1.0, 1.0)]"
1,(B),1.0,"[((), (B), 1.0, 1.0)]"
2,"(A, B)",1.0,"[((), (A, B), 1.0, 1.0), ((A), (B), 1.0, 1.0), ((B), (A), 1.0, 1.0)]"
3,"(D, A)",0.75,"[((D), (A), 1.0, 1.0)]"
4,"(D, B)",0.75,"[((D), (B), 1.0, 1.0)]"
5,"(D, A, B)",0.75,"[((D), (A, B), 1.0, 1.0), ((D, A), (B), 1.0, 1.0), ((D, B), (A), 1.0, 1.0)]"


Unnamed: 0,items,support,lhs,direction,rhs,confidence,lift
0,(A),1.0,[],->,['A'],1.0,1.0
1,(B),1.0,[],->,['B'],1.0,1.0
2,"(A, B)",1.0,[],->,"['A', 'B']",1.0,1.0
3,"(A, B)",1.0,['A'],->,['B'],1.0,1.0
4,"(A, B)",1.0,['B'],->,['A'],1.0,1.0
5,"(D, A)",0.75,['D'],->,['A'],1.0,1.0
6,"(D, B)",0.75,['D'],->,['B'],1.0,1.0
7,"(D, A, B)",0.75,['D'],->,"['A', 'B']",1.0,1.0
8,"(D, A, B)",0.75,"['D', 'A']",->,['B'],1.0,1.0
9,"(D, A, B)",0.75,"['D', 'B']",->,['A'],1.0,1.0


# **Minimum Support = 0,4**
# **Minimum Confidence = 0,8**

In [None]:
records = apriori(transactions, min_support = 0.4, min_confidence = 0.8)

result = list(records)

# This is for adjusting the output width for readability purposes
pd.set_option('max_colwidth', None)

display(pd.DataFrame(result))

# Let's use our helper utility function to see the results in a more suitable format, and apply sorting by the "lift" parameter
resultDataFrame = convertToDataFrame(result)
display(resultDataFrame)


Unnamed: 0,items,support,ordered_statistics
0,(A),1.0,"[((), (A), 1.0, 1.0)]"
1,(B),1.0,"[((), (B), 1.0, 1.0)]"
2,"(A, B)",1.0,"[((), (A, B), 1.0, 1.0), ((A), (B), 1.0, 1.0), ((B), (A), 1.0, 1.0)]"
3,"(A, C)",0.5,"[((C), (A), 1.0, 1.0)]"
4,"(D, A)",0.75,"[((D), (A), 1.0, 1.0)]"
5,"(A, E)",0.5,"[((E), (A), 1.0, 1.0)]"
6,"(C, B)",0.5,"[((C), (B), 1.0, 1.0)]"
7,"(D, B)",0.75,"[((D), (B), 1.0, 1.0)]"
8,"(E, B)",0.5,"[((E), (B), 1.0, 1.0)]"
9,"(C, E)",0.5,"[((C), (E), 1.0, 2.0), ((E), (C), 1.0, 2.0)]"


Unnamed: 0,items,support,lhs,direction,rhs,confidence,lift
0,(A),1.0,[],->,['A'],1.0,1.0
1,(B),1.0,[],->,['B'],1.0,1.0
2,"(A, B)",1.0,[],->,"['A', 'B']",1.0,1.0
3,"(A, B)",1.0,['A'],->,['B'],1.0,1.0
4,"(A, B)",1.0,['B'],->,['A'],1.0,1.0
5,"(A, C)",0.5,['C'],->,['A'],1.0,1.0
6,"(D, A)",0.75,['D'],->,['A'],1.0,1.0
7,"(A, E)",0.5,['E'],->,['A'],1.0,1.0
8,"(C, B)",0.5,['C'],->,['B'],1.0,1.0
9,"(D, B)",0.75,['D'],->,['B'],1.0,1.0


# **Minimum Support = 0,4**
# **Minimum Confidence = 0,4**

In [None]:
records = apriori(transactions, min_support = 0.4, min_confidence = 0.4)

result = list(records)

# This is for adjusting the output width for readability purposes
pd.set_option('max_colwidth', None)

display(pd.DataFrame(result))

# Let's use our helper utility function to see the results in a more suitable format, and apply sorting by the "lift" parameter
resultDataFrame = convertToDataFrame(result)
display(resultDataFrame)


Unnamed: 0,items,support,ordered_statistics
0,(A),1.0,"[((), (A), 1.0, 1.0)]"
1,(B),1.0,"[((), (B), 1.0, 1.0)]"
2,(C),0.5,"[((), (C), 0.5, 1.0)]"
3,(D),0.75,"[((), (D), 0.75, 1.0)]"
4,(E),0.5,"[((), (E), 0.5, 1.0)]"
5,"(A, B)",1.0,"[((), (A, B), 1.0, 1.0), ((A), (B), 1.0, 1.0), ((B), (A), 1.0, 1.0)]"
6,"(A, C)",0.5,"[((), (A, C), 0.5, 1.0), ((A), (C), 0.5, 1.0), ((C), (A), 1.0, 1.0)]"
7,"(D, A)",0.75,"[((), (D, A), 0.75, 1.0), ((A), (D), 0.75, 1.0), ((D), (A), 1.0, 1.0)]"
8,"(A, E)",0.5,"[((), (A, E), 0.5, 1.0), ((A), (E), 0.5, 1.0), ((E), (A), 1.0, 1.0)]"
9,"(C, B)",0.5,"[((), (C, B), 0.5, 1.0), ((B), (C), 0.5, 1.0), ((C), (B), 1.0, 1.0)]"


Unnamed: 0,items,support,lhs,direction,rhs,confidence,lift
0,(A),1.00,[],->,['A'],1.00,1.0
1,(B),1.00,[],->,['B'],1.00,1.0
2,(C),0.50,[],->,['C'],0.50,1.0
3,(D),0.75,[],->,['D'],0.75,1.0
4,(E),0.50,[],->,['E'],0.50,1.0
...,...,...,...,...,...,...,...
74,"(A, B, C, E)",0.50,"['C', 'E']",->,"['A', 'B']",1.00,1.0
75,"(A, B, C, E)",0.50,"['A', 'B', 'C']",->,['E'],1.00,2.0
76,"(A, B, C, E)",0.50,"['A', 'B', 'E']",->,['C'],1.00,2.0
77,"(A, B, C, E)",0.50,"['A', 'C', 'E']",->,['B'],1.00,1.0


# **Minimum Support = 0,8**
# **Minimum Confidence = 0,8**

In [None]:
records = apriori(transactions, min_support = 0.8, min_confidence = 0.8)

result = list(records)

# This is for adjusting the output width for readability purposes
pd.set_option('max_colwidth', None)

display(pd.DataFrame(result))

# Let's use our helper utility function to see the results in a more suitable format, and apply sorting by the "lift" parameter
resultDataFrame = convertToDataFrame(result)
display(resultDataFrame)

Unnamed: 0,items,support,ordered_statistics
0,(A),1.0,"[((), (A), 1.0, 1.0)]"
1,(B),1.0,"[((), (B), 1.0, 1.0)]"
2,"(A, B)",1.0,"[((), (A, B), 1.0, 1.0), ((A), (B), 1.0, 1.0), ((B), (A), 1.0, 1.0)]"


Unnamed: 0,items,support,lhs,direction,rhs,confidence,lift
0,(A),1.0,[],->,['A'],1.0,1.0
1,(B),1.0,[],->,['B'],1.0,1.0
2,"(A, B)",1.0,[],->,"['A', 'B']",1.0,1.0
3,"(A, B)",1.0,['A'],->,['B'],1.0,1.0
4,"(A, B)",1.0,['B'],->,['A'],1.0,1.0
