In [None]:
import zipfile
import os

zip_file_path = "/content/drive/MyDrive/Supermarket_Sales.zip"
extract_to_folder = "/content/Supermarket_Sales"

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_folder)

print(f"Dataset extracted to: {os.path.abspath(extract_to_folder)}")


Dataset extracted to: /content/Supermarket_Sales


In [None]:
import pandas as pd

df = pd.read_csv("/content/Supermarket_Sales/supermarket_sales -.csv")


In [None]:

print(df.head())


    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and travel       86.31         7  30.2085  634.3785   2/8/2019   

    Time      Payment    cogs  gross margin percentage  gross income  Rating  
0  13:08      Ewallet  522.83                 4.761905       26.1415     9.1  
1  10:

In [None]:
total_transactions = len(df)


In [None]:
A = df[df["Customer type"] == "Member"]


In [None]:
B = df[df["Payment"] == "Credit card"]


In [None]:

P_A = len(A) / total_transactions
P_B = len(B) / total_transactions
P_A_intersection_B = len(df[(df["Customer type"] == "Member") & (df["Payment"] == "Credit card")]) / total_transactions
P_A_union_B = P_A + P_B - P_A_intersection_B
P_A_given_B = P_A_intersection_B / P_B


In [None]:
print(f"P(A): {P_A:.4f}")
print(f"P(B): {P_B:.4f}")
print(f"P(A ∪ B): {P_A_union_B:.4f}")
print(f"P(A ∩ B): {P_A_intersection_B:.4f}")
print(f"P(A | B): {P_A_given_B:.4f}")

P(A): 0.5010
P(B): 0.3110
P(A ∪ B): 0.6400
P(A ∩ B): 0.1720
P(A | B): 0.5531


In [None]:
P_health_beauty = len(df[df["Product line"] == "Health and beauty"]) / total_transactions
print(f"P(Health & Beauty): {P_health_beauty:.4f}")


P(Health & Beauty): 0.1520


In [None]:
Ewallet_transactions = df[df["Payment"] == "Ewallet"]
P_more_than_5_given_Ewallet = len(Ewallet_transactions[Ewallet_transactions["Quantity"] > 5]) / len(Ewallet_transactions)
print(f"P(Quantity > 5 | Ewallet): {P_more_than_5_given_Ewallet:.4f}")


P(Quantity > 5 | Ewallet): 0.4870


In [None]:
Yangon_transactions = df[df["City"] == "Yangon"]
P_cash_given_Yangon = len(Yangon_transactions[Yangon_transactions["Payment"] == "Cash"]) / len(Yangon_transactions)
print(f"P(Cash | Yangon): {P_cash_given_Yangon:.4f}")


P(Cash | Yangon): 0.3235


In [None]:

P_member_high_rating = len(df[(df["Customer type"] == "Member") & (df["Rating"] > 8)]) / total_transactions
print(f"P(Member and Rating > 8): {P_member_high_rating:.4f}")


P(Member and Rating > 8): 0.1570


In [None]:
import math
import pandas as pd

df = pd.read_csv("/content/Supermarket_Sales/supermarket_sales -.csv")


In [None]:
def factorial(n):
    return math.factorial(n)

In [None]:
def permutation(n, r):
    return math.factorial(n) // math.factorial(n - r)


In [None]:
def combination(n, r):
    return math.factorial(n) // (math.factorial(r) * math.factorial(n - r))


In [None]:
P_5_from_20 = permutation(20, 5)
print(f"Permutations (P(20,5)): {P_5_from_20}")


Permutations (P(20,5)): 1860480


In [None]:
unique_product_lines = df["Product line"].nunique()
C_3_from_6 = combination(unique_product_lines, 3)
print(f"Combinations (C(6,3)): {C_3_from_6}")


Combinations (C(6,3)): 20


In [None]:
unique_branches = df["Branch"].nunique()
factorial_branches = factorial(unique_branches)
print(f"Factorial of branches ({unique_branches}!): {factorial_branches}")


Factorial of branches (3!): 6


In [None]:
payment_methods = df["Payment"].nunique()
if payment_methods < 4:
    print("Cannot select 4 payment methods from only 3 available types.")
else:
    P_4_from_3 = permutation(payment_methods, 4)
    print(f"Permutations (P(3,4)): {P_4_from_3}")


Cannot select 4 payment methods from only 3 available types.


In [None]:
C_5_from_15 = combination(15, 5)
print(f"Combinations (C(15,5)): {C_5_from_15}")


Combinations (C(15,5)): 3003


In [None]:
P_6_from_26 = permutation(26, 6)
print(f"Permutations (P(26,6)): {P_6_from_26}")


Permutations (P(26,6)): 165765600
