In [15]:
import pandas as pd
!pip install pandas
try:
  df = pd.read_csv('supermarket.csv')
  print("Dataset loaded successfully.")
  print(df.head())
except FileNotFoundError:
  print("Error: 'your_file.csv' not found. Please provide the correct file path.")
except pd.errors.ParserError:
  print("Error: Unable to parse the CSV file. Please check the file format.")
except Exception as e:
  print(f"An unexpected error occurred: {e}")


Dataset loaded successfully.
    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and travel       86.31         7  30.2085  634.3785   2/8/2019   

    Time      Payment    cogs  gross margin percentage  gross income  Rating  
0  13:08      Ewallet  522.83                 4.761905  

In [16]:
def event_a(row):
  """Checks if the transaction was made by a Member customer."""
  return row['Customer type'] == 'Member'

def event_b(row):
  """Checks if the transaction was made using Credit Card payment."""
  return row['Payment'] == 'Credit card'
try:
  df['Event A'] = df.apply(event_a, axis=1)
  df['Event B'] = df.apply(event_b, axis=1)
  print("Events defined and added as new columns to the DataFrame.")
  print(df.head())
except KeyError as e:
  print(f"Error: Column '{e}' not found in the DataFrame. Please check the column names in your dataset.")
except Exception as e:
  print(f"An unexpected error occurred: {e}")


Events defined and added as new columns to the DataFrame.
    Invoice ID Branch       City Customer type  Gender  \
0  750-67-8428      A     Yangon        Member  Female   
1  226-31-3081      C  Naypyitaw        Normal  Female   
2  631-41-3108      A     Yangon        Normal    Male   
3  123-19-1176      A     Yangon        Member    Male   
4  373-73-7910      A     Yangon        Normal    Male   

             Product line  Unit price  Quantity   Tax 5%     Total       Date  \
0       Health and beauty       74.69         7  26.1415  548.9715   1/5/2019   
1  Electronic accessories       15.28         5   3.8200   80.2200   3/8/2019   
2      Home and lifestyle       46.33         7  16.2155  340.5255   3/3/2019   
3       Health and beauty       58.22         8  23.2880  489.0480  1/27/2019   
4       Sports and travel       86.31         7  30.2085  634.3785   2/8/2019   

    Time      Payment    cogs  gross margin percentage  gross income  Rating  \
0  13:08      Ewallet  522

In [17]:
total_transactions = len(df)
event_a_count = df['Event A'].sum()
event_b_count = df['Event B'].sum()
event_a_and_b_count = len(df[(df['Event A'] == True) & (df['Event B'] == True)])

p_a = event_a_count / total_transactions
p_b = event_b_count / total_transactions
p_a_intersect_b = event_a_and_b_count / total_transactions
p_a_union_b = p_a + p_b - p_a_intersect_b

if p_b > 0:
  p_a_given_b = p_a_intersect_b / p_b
else:
  p_a_given_b = 0
print(f"P(A): {p_a}")
print(f"P(B): {p_b}")
print(f"P(A ∩ B): {p_a_intersect_b}")
print(f"P(A ∪ B): {p_a_union_b}")
print(f"P(A | B): {p_a_given_b}")


P(A): 0.501
P(B): 0.311
P(A ∩ B): 0.172
P(A ∪ B): 0.6400000000000001
P(A | B): 0.5530546623794211


In [18]:
try:
  health_beauty_count = len(df[df['Product line'] == 'Health and beauty'])
  total_transactions = len(df)
  probability = health_beauty_count / total_transactions
  print(f"The probability that a randomly chosen transaction belongs to the Health & Beauty product line is: {probability}")
except KeyError:
  print("Error: 'Product line' column not found in the DataFrame. Please check the column names in your dataset.")
except Exception as e:
  print(f"An unexpected error occurred: {e}")


The probability that a randomly chosen transaction belongs to the Health & Beauty product line is: 0.152


In [19]:
def event_ewallet(row):
    return row['Payment'] == 'Ewallet'

def event_more_than_5_items(row):
    return row['Quantity'] > 5
df['Ewallet'] = df.apply(event_ewallet, axis=1)
df['MoreThan5Items'] = df.apply(event_more_than_5_items, axis=1)
ewallet_customers = df[df['Ewallet'] == True]
favorable_outcomes = len(ewallet_customers[ewallet_customers['MoreThan5Items'] == True])
total_ewallet_customers = len(ewallet_customers)

if total_ewallet_customers > 0:
    probability = favorable_outcomes / total_ewallet_customers
    print(f"The probability that a customer who used Ewallet purchased more than 5 items is: {probability}")
else:
    print("No customers used Ewallet in the dataset.")


The probability that a customer who used Ewallet purchased more than 5 items is: 0.48695652173913045


In [20]:
try:
    yangon_transactions = df[df['City'] == 'Yangon']
    cash_transactions_yangon = yangon_transactions[yangon_transactions['Payment'] == 'Cash']
    probability_cash_yangon = len(cash_transactions_yangon) / len(yangon_transactions)
    print(f"Probability of Cash payment in Yangon: {probability_cash_yangon}")
except KeyError as e:
    print(f"Error: Column '{e}' not found. Please check your dataset.")
try:
    member_high_rating = df[(df['Customer type'] == 'Member') & (df['Rating'] > 8)]
    probability_member_high_rating = len(member_high_rating) / len(df)
    print(f"Probability of Member with rating > 8: {probability_member_high_rating}")
except KeyError as e:
    print(f"Error: Column '{e}' not found. Please check your dataset.")


Probability of Cash payment in Yangon: 0.3235294117647059
Probability of Member with rating > 8: 0.157


In [21]:
import math

def factorial(n):
  """Computes the factorial of a non-negative integer."""
  if n < 0:
    raise ValueError("Factorial is not defined for negative numbers.")
  elif n == 0:
    return 1
  else:
    return math.factorial(n)

def permutations(n, r):
  """Computes the number of permutations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
      raise ValueError("Invalid input for permutations.")
  return factorial(n) // factorial(n - r)

def combinations(n, r):
  """Computes the number of combinations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
      raise ValueError("Invalid input for combinations.")
  return factorial(n) // (factorial(r) * factorial(n - r))


In [22]:
try:
    unique_branches = df['Branch'].nunique()
    branch_factorial = factorial(unique_branches)
    print(f"Factorial of the number of unique branches: {branch_factorial}")
except KeyError:
    print("Error: 'Branch' column not found in the DataFrame. Please check the column names in your dataset.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
n = 20
r = 5
try:
    permutation_result = permutations(n, r)
    print(f"Number of ways to arrange 5 transactions from 20: {permutation_result}")
except ValueError as e:
    print(f"Error in permutation calculation: {e}")
n = 6
r = 3
try:
    combination_result = combinations(n,r)
    print(f"Number of ways to select 3 product lines from 6 categories: {combination_result}")
except ValueError as e:
    print(f"Error in combination calculation: {e}")


Factorial of the number of unique branches: 6
Number of ways to arrange 5 transactions from 20: 1860480
Number of ways to select 3 product lines from 6 categories: 20


In [23]:
def combinations(n, r):
  """Computes the number of combinations of n items taken r at a time."""
  if n < 0 or r < 0 or r > n:
      raise ValueError("Invalid input for combinations.")
  return math.factorial(n) // (math.factorial(r) * math.factorial(n - r))
payment_methods = 3
available_payment_types = 3
ways_to_display = combinations(available_payment_types, payment_methods)

print(f"The number of ways to display {payment_methods} payment methods from {available_payment_types} types is: {ways_to_display}")
total_employees = 15
employees_to_select = 5
ways_to_select_employees = combinations(total_employees, employees_to_select)
print(f"The number of ways to select {employees_to_select} employees from {total_employees} is: {ways_to_select_employees}")
alphabet_size = 26
invoice_code_length = 6
ways_to_create_codes = pow(alphabet_size, invoice_code_length)
print(f"The number of ways to create unique invoice codes using {invoice_code_length} letters from the English alphabet is: {ways_to_create_codes}")


The number of ways to display 3 payment methods from 3 types is: 1
The number of ways to select 5 employees from 15 is: 3003
The number of ways to create unique invoice codes using 6 letters from the English alphabet is: 308915776
