### Loading Libraries

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

### Reading of dataset

In [2]:
df = pd.read_csv('scanner_data.csv')

### Most purchased SKU

In [3]:
# Group the DataFrame by SKU and calculate the sum of Quantity for each SKU
grouped_df = df.groupby('SKU')['Quantity'].sum()

# Find the SKU with the maximum total Quantity
most_purchased_sku = grouped_df.idxmax()

print("The most purchased product is:", most_purchased_sku)

The most purchased product is: CKDW0


### Top 3 products purchased after the most purchased SKU

In [4]:
most_purchased_sku = most_purchased_sku  # Replace with the SKU obtained from the previous code

# Filter transactions after the purchase of the most purchased SKU by the customer
after_purchase_df = df[df['Date'] > df[df['SKU'] == most_purchased_sku]['Date'].max()]

# Group the filtered DataFrame by SKU and calculate the sum of Quantity for each SKU
grouped_after_purchase_df = after_purchase_df.groupby('SKU')['Quantity'].sum()

# Find the top 3 products that were most purchased after the purchase of the most purchased SKU
top_3_purchased = grouped_after_purchase_df.nlargest(3).index.tolist()

print("Top 3 products purchased after the most purchased SKU:")
print(top_3_purchased)


Top 3 products purchased after the most purchased SKU:
['DR8BG', 'MAJO7', 'PVMO1']


### Probability of purchasing the top 3 products for each customer who purchased the most_purchased_sku

In [5]:
most_purchased_sku = most_purchased_sku 
top_3_purchased = top_3_purchased 

customer_transactions = df[df['SKU'] == most_purchased_sku]

# Group the customer transactions by Customer_ID and calculate the total number of transactions
customer_transaction_count = customer_transactions.groupby('Customer_ID').size().reset_index(name='Transaction_Count')

# Filter transactions for customers who purchased the top_3_purchased SKUs
conditional_transactions = df[df['SKU'].isin(top_3_purchased)]

# Group the conditional transactions by Customer_ID and calculate the total number of transactions
conditional_transaction_count = conditional_transactions.groupby('Customer_ID').size().reset_index(name='Conditional_Transaction_Count')

# Merge the transaction counts for each customer
customer_probabilities = pd.merge(customer_transaction_count, conditional_transaction_count, on='Customer_ID', how='inner')

# Calculate the probability for each customer
customer_probabilities['Probability'] = customer_probabilities['Conditional_Transaction_Count'] / customer_probabilities['Transaction_Count']

# Sort the customers by their probability in descending order
customer_probabilities = customer_probabilities.sort_values('Probability', ascending=False)

print("Probability of purchasing the top 3 products for each customer who purchased the most_purchased_sku:")
print(customer_probabilities)


Probability of purchasing the top 3 products for each customer who purchased the most_purchased_sku:
   Customer_ID  Transaction_Count  Conditional_Transaction_Count  Probability
0         3595                  1                              1          1.0
1        16783                  2                              1          0.5
