# Data Analysis for Store Dataset

In [2]:

# Import necessary libraries
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Load the dataset
file_path = 'dataset-3.csv'
data = pd.read_csv(file_path)

# Convert InvoiceDate to datetime for temporal analysis
data['InvoiceDate'] = pd.to_datetime(data['InvoiceDate'])

# Drop rows with missing CustomerID and erroneous data (negative Quantity/UnitPrice)
cleaned_data = data.dropna(subset=['CustomerID'])
cleaned_data = cleaned_data[(cleaned_data['Quantity'] > 0) & (cleaned_data['UnitPrice'] > 0)]

# Preliminary analysis: Frequency of items purchased
item_frequency = cleaned_data['StockCode'].value_counts().head(10)
item_frequency
    

StockCode
85123A    2035
22423     1723
85099B    1618
84879     1408
47566     1396
20725     1317
22720     1159
20727     1105
POST      1099
23203     1098
Name: count, dtype: int64

## Monthly Transactions Analysis

In [3]:

# Analyze the number of transactions per month
cleaned_data['Month'] = cleaned_data['InvoiceDate'].dt.month
monthly_transactions = cleaned_data.groupby('Month')['InvoiceNo'].nunique()
monthly_transactions
    

Month
1      987
2      997
3     1321
4     1149
5     1555
6     1393
7     1331
8     1280
9     1755
10    1929
11    2657
12    2178
Name: InvoiceNo, dtype: int64

## Market Basket Analysis

In [4]:

# Prepare data for market basket analysis
basket = cleaned_data.groupby(['InvoiceNo', 'StockCode'])['Quantity'].sum().unstack(fill_value=0)
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

# Perform the Apriori algorithm
frequent_itemsets = apriori(basket, min_support=0.01, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.1)
top_rules = rules.sort_values(by='lift', ascending=False).head(10)
top_rules
    

  basket = basket.applymap(lambda x: 1 if x > 0 else 0)


TypeError: association_rules() missing 1 required positional argument: 'num_itemsets'

## Recommendations

In [None]:

# Provide recommendations based on analysis
recommendations = [
    "Promote high-frequency items such as 85123A and 85099B.",
    "Bundle items frequently purchased together as identified in the market basket analysis.",
    "Schedule promotions during peak shopping hours (12 PM - 1 PM).",
    "Target midweek shoppers with tailored campaigns.",
    "Improve inventory management for high-demand items like 85123A."
]
recommendations
    