In [56]:
import numpy as np
np.set_printoptions(suppress= True, precision= 4)

# Data generation for the project

In [57]:
rng = np.random.default_rng(seed = 50)

In [58]:
transaction_ids = np.arange(1000,1100)

In [59]:
transaction_ids.shape

(100,)

In [60]:
prices = np.round(rng.uniform(low = 20.0, high = 50.0, size = 100),2)

In [61]:
prices[:5]

array([43.62, 45.01, 36.44, 49.2 , 27.11])

In [62]:
np.random.seed(50)
quantities = rng.integers(low = 1, high = 6, size = 100)


In [63]:
quantities

array([4, 5, 1, 2, 4, 5, 1, 1, 3, 2, 5, 5, 3, 4, 5, 1, 1, 1, 4, 3, 4, 5,
       2, 3, 5, 4, 5, 1, 1, 2, 3, 4, 2, 1, 5, 1, 5, 1, 3, 4, 4, 5, 4, 4,
       1, 1, 2, 4, 1, 3, 1, 1, 5, 2, 1, 1, 2, 2, 1, 2, 4, 2, 5, 1, 5, 2,
       2, 2, 2, 1, 1, 1, 5, 4, 4, 3, 3, 3, 5, 1, 3, 2, 1, 1, 5, 1, 5, 2,
       4, 2, 5, 3, 2, 5, 1, 1, 2, 1, 1, 3], dtype=int64)

In [64]:
customer_segments =  rng.choice(['NEW', 'LOYAL', 'VIP'],size = 100, p = [0.4,0.4,0.2])

In [65]:
customer_segments[:5]

array(['NEW', 'LOYAL', 'LOYAL', 'NEW', 'LOYAL'], dtype='<U5')

In [66]:
is_discounted = rng.choice([True, False], size = 100, p = [0.3, 0.7])

In [67]:
is_discounted[:5]

array([ True, False,  True, False, False])

# Task 1: Calculate Transaction Revenue

Using a single vectorized operation, calculate the total revenue for each transaction. This should be the price multiplied by the quantity. Store the result in a new array called `revenues`.

In [68]:
#revenue = qunatities * prices
revenues = prices * quantities

In [69]:
revenues[:5]

array([174.48, 225.05,  36.44,  98.4 , 108.44])

In [70]:
revenues.shape

(100,)

# Task 2: Basic Statistical Analysis

Now that you have the `revenues` array, calculate and print the average (mean) transaction value and the total revenue across all transactions.

In [71]:
total_revenue = np.sum(revenues)

In [72]:
total_revenue

9239.83

In [73]:
average_revenue = np.mean(revenues)

In [74]:
average_revenue

92.3983

# Task 3: Conditional Logic with `np.where`


StyleSphere wants to apply a 5% processing fee to all transactions that were not discounted. Use `np.where` to create a new array `final_revenues` which is equal to the original `revenues` for discounted items, but is `revenues * 1.05` for non-discounted items.



In [75]:
final_revenues = np.where(is_discounted == True, (revenues * 1.05), revenues)

In [76]:
final_revenues[:5]

array([183.204, 225.05 ,  38.262,  98.4  , 108.44 ])

# Task 4: Methods for Boolean Arrays

The marketing team wants to know how many transactions were made by 'VIP' customers. Create a boolean array to identify these transactions and then use an array method to count them.

In [77]:
vip_mask = customer_segments == 'VIP'

In [78]:
vip_mask[:10]

array([False, False, False, False, False, False, False,  True,  True,
       False])

In [79]:
vip_transactions = transaction_ids[vip_mask]

In [80]:
vip_count = vip_mask.sum() #this counts the TRUE instances in the vip_mask array

In [81]:
vip_count

24

# Task 5: Sorting to Find Top Transactions

Identify the top 5 highest-value transactions from the `revenues` array. You'll need an indirect sort to get the indices of the highest values, and then use those indices to show the revenues

In [82]:
sorted_revenues = np.sort(revenues)

In [83]:
top_5_revenues = sorted_revenues[-5:][::-1] #the second pair of brackets and it's code reverse the order

In [84]:
top_5_revenues

array([248.  , 229.4 , 225.05, 202.55, 197.05])

# Task 6: Set Logic for Unique Segments

To understand our customer base, find all the unique customer segments that exist in the `customer_segments` array.

In [85]:
unique_customers = np.unique(customer_segments)

In [86]:
unique_customers

array(['LOYAL', 'NEW', 'VIP'], dtype='<U5')

# **High-Value Customer Analysis**

The leadership team wants a report on the **total revenue generated from high-value customers ('Loyal' or 'VIP') on full-price (non-discounted) items**. This requires combining conditional logic, boolean array operations, and statistical methods. Write the code to calculate and print this final value.

In [87]:
customer_mask = customer_segments != 'NEW' 
discount_mask =  is_discounted == True

In [88]:
revenue_mask = customer_mask & discount_mask

In [89]:
high_value_revenue = revenues[revenue_mask]

In [90]:
high_value_revenue

array([ 36.44,  36.67, 248.  ,  49.54,  65.52, 135.35,  41.2 , 187.32,
        80.44,  99.57,  55.28,  53.6 ,  92.24,  70.26,  46.07,  61.8 ,
       118.02, 114.35,  91.22, 109.55,  99.08])

In [91]:
total_high_value_revenue = np.sum(high_value_revenue)

In [93]:
print(total_high_value_revenue)

1891.5199999999998
