<a href="https://colab.research.google.com/github/intimanjunath/VAR-Silver-Gold-Commodity-Pricing-Predict-Time-Series/blob/main/Market_Basket_Item.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install apyori

Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5954 sha256=d56a380c0e3c5c2ae61eb6c63653261e9aabca558ae2d13e080c7d1308681d24
  Stored in directory: /root/.cache/pip/wheels/c4/1a/79/20f55c470a50bb3702a8cb7c94d8ada15573538c7f4baebe2d
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [25]:
# Step 1: Load the training dataset
train_data = pd.read_csv('/content/TRAIN-ARULES.csv')
print("Training Data Preview:")
print(train_data.head())

Training Data Preview:
   order_id  user_id                              product_name
0      1483       90  Organic Pink Lemonade Bunny Fruit Snacks
1      1483       90                      Dark Chocolate Minis
2      1483       90   Sparkling Water, Natural Mango Essenced
3      1483       90                Peach-Pear Sparkling Water
4      1483       90            Organic Heritage Flakes Cereal


In [26]:
# Group product names by order_id to form transactions
train_transactions = train_data.groupby('order_id')['product_name'].apply(list).tolist()

In [28]:
# Step 2: Apply Apriori algorithm on the training data
association_rules = apriori(
    train_transactions,
    min_support=0.0045,  # Minimum support
    min_confidence=0.2,  # Minimum confidence
    min_lift=3,          # Minimum lift
    min_length=2         # Minimum length of the rule
)

In [29]:
# Convert the rules into a list for processing
rules = list(association_rules)
print(f"Number of rules generated: {len(rules)}")

Number of rules generated: 987


In [30]:
# Step 3: Extract and summarize rules
rules_summary = []
for rule in rules:
    for ordered_stat in rule.ordered_statistics:
        if len(ordered_stat.items_base) > 0 and len(ordered_stat.items_add) > 0:
            rules_summary.append({
                'Base Items': tuple(ordered_stat.items_base),
                'Add Items': tuple(ordered_stat.items_add),
                'Support': rule.support,
                'Confidence': ordered_stat.confidence,
                'Lift': ordered_stat.lift
            })

# Convert rules summary to a DataFrame
rules_df = pd.DataFrame(rules_summary)
rules_df = rules_df.sort_values(by='Lift', ascending=False)
print("\nTop 5 Association Rules:")
print(rules_df.head())


Top 5 Association Rules:
                                             Base Items  \
198                   (Chocolate Peppermint Stick Bar,)   
199                           (Imported Mineral Water,)   
2172  (Light and Lean Quinoa Black Beans with Butter...   
979         (Strawberry Preserves, Antimo Caputo Flour)   
976                 (Hazelnut Bite Size Wafer Cookies,)   

                                              Add Items   Support  Confidence  \
198                           (Imported Mineral Water,)  0.004937       1.000   
199                   (Chocolate Peppermint Stick Bar,)  0.004937       1.000   
2172  (Organic Strawberry Chia Lowfat 2% Cottage Che...  0.004937       1.000   
979                 (Hazelnut Bite Size Wafer Cookies,)  0.004937       1.000   
976         (Strawberry Preserves, Antimo Caputo Flour)  0.004937       0.875   

            Lift  
198   202.571429  
199   202.571429  
2172  177.250000  
979   177.250000  
976   177.250000  


In [31]:
# Step 4: Load the test dataset
test_data = pd.read_csv('/content/testarules.csv')
print("\nTest Data Preview:")
print(test_data.head())


Test Data Preview:
                  Item1                                     Item2  \
0  Dark Chocolate Minis  Organic Pink Lemonade Bunny Fruit Snacks   

                        Item3  Item4  Item5  
0  Peach-Pear Sparkling Water    NaN    NaN  


In [33]:
# Transform the test dataset into a list of transactions
test_transactions = []
for index, row in test_data.iterrows():
    transaction = [str(item) for item in row if pd.notnull(item)]
    test_transactions.append(transaction)

print("\nTransformed Test Transactions:")
print(test_transactions[:5])


Transformed Test Transactions:
[['Dark Chocolate Minis', 'Organic Pink Lemonade Bunny Fruit Snacks', 'Peach-Pear Sparkling Water']]


In [34]:
# Step 5: Predict next items for test transactions
predictions = []
for transaction in test_transactions:
    transaction_set = set(transaction)
    predicted_items = set()

    for _, rule in rules_df.iterrows():
        base_items = set(rule['Base Items'])
        add_items = set(rule['Add Items'])

        # Check if base items match the transaction
        if base_items.issubset(transaction_set):
            predicted_items.update(add_items - transaction_set)  # Exclude already present items

    predictions.append(list(predicted_items))

In [35]:
# Step 6: Display predictions
print("\nPredictions for Test Data:")
for i, (original, prediction) in enumerate(zip(test_transactions, predictions)):
    print(f"Transaction {i + 1}: Original: {original} -> Prediction: {prediction}")


Predictions for Test Data:
Transaction 1: Original: ['Dark Chocolate Minis', 'Organic Pink Lemonade Bunny Fruit Snacks', 'Peach-Pear Sparkling Water'] -> Prediction: ['Orange Sparkling Water', 'Sparkling Water, Natural Mango Essenced', 'Crunch White Chocolate Macadamia Nut Granola Bars', 'Organic Graham Crunch Cereal', 'Unsweetened Original Almond Breeze Almond Milk', "Organic Bernie's Farm Fruit Snacks strawberry, raspberry, & orange", 'Organic Heritage Flakes Cereal', 'Dark & Mint Filled Chocolate Squares', 'Twilight Delight 72% Cacao Intense Dark Squares', 'Maple Pumpkin Seeds with Sea Salt Chewy with a Crunch Granola Bars', 'Crunch Chocolate Peanut Butter Granola Bar', 'Crunch Granola Bar Chocolate Chip', 'Crunchy Coconut Granola Bars', 'Lemon Sparkling Water', 'Trail Mix Fruit & Nut Chewy Granola Bars', 'Healthy Grains Granola Bar, Vanilla Blueberry']


In [37]:
# Step 6: Display predictions
print("\nPredictions for Test Data:")

for i, (original, prediction) in enumerate(zip(test_transactions, predictions)):
    print(f"Transaction {i + 1}:")
    print("  Original Items:")
    for item in original:
        print(f"    - {item}")
    print("  Predicted Items:")
    if prediction:
        for item in prediction:
            print(f"    - {item}")
    else:
        print("    (No predictions available)")
    print("-" * 50)  # Separator for readability


Predictions for Test Data:
Transaction 1:
  Original Items:
    - Dark Chocolate Minis
    - Organic Pink Lemonade Bunny Fruit Snacks
    - Peach-Pear Sparkling Water
  Predicted Items:
    - Orange Sparkling Water
    - Sparkling Water, Natural Mango Essenced
    - Crunch White Chocolate Macadamia Nut Granola Bars
    - Organic Graham Crunch Cereal
    - Unsweetened Original Almond Breeze Almond Milk
    - Organic Bernie's Farm Fruit Snacks strawberry, raspberry, & orange
    - Organic Heritage Flakes Cereal
    - Dark & Mint Filled Chocolate Squares
    - Twilight Delight 72% Cacao Intense Dark Squares
    - Maple Pumpkin Seeds with Sea Salt Chewy with a Crunch Granola Bars
    - Crunch Chocolate Peanut Butter Granola Bar
    - Crunch Granola Bar Chocolate Chip
    - Crunchy Coconut Granola Bars
    - Lemon Sparkling Water
    - Trail Mix Fruit & Nut Chewy Granola Bars
    - Healthy Grains Granola Bar, Vanilla Blueberry
--------------------------------------------------
