In [1]:
# Retail Data – Business Questions (for retail_data)

*Assumed columns:* [StoreID, Sales, Customers, Inventory, Returns]

## Basic Performance Analysis
1. What is the *total revenue* generated across all stores?
2. Which store recorded the *highest sales*?
3. Which store recorded the *lowest sales*?
4. What is the *average daily sales* per store?
5. How many stores achieved sales *above ₹35,000*?

## Customer Insights
6. Which store had the *highest customer footfall*?
7. What is the *average number of customers* per store?
8. How many stores have *more than 450 customers*?
9. Which store has the *lowest customer count*?
10. Is there any store where *high sales do not correspond to high customers*?

## Inventory & Returns Analysis
11. What is the *total inventory* across all stores?
12. Which store has the *highest inventory stock*?
13. How many stores have *inventory less than 1300 units*?
14. Which store has the *highest return rate* (returns ÷ inventory)?
15. Which stores have *returns greater than 15 units*?

## Sales & Customer Correlation
16. Is there a *positive correlation* between sales and customers?
17. Which store has *high customer visits but low sales*?
18. Which store has *low customers but high sales*?
19. How many stores have *sales per customer above ₹90*?
20. Which stores could *benefit from customer acquisition campaigns* (low customers, high inventory)?

## Profitability & Growth
21. If profit margin is *20% of sales, what is the **profit per store*?
22. Which store has the *highest profit*?
23. Which store has the *lowest profit*?
24. If sales increase by *10% next month, what are the **new sales figures*?
25. How will *total profit* change with the 10% sales increase?

## Operational Strategy
26. Which stores need *inventory restocking* based on sales and current stock?
27. Which stores show *high returns percentage* and might require *quality checks*?
28. Which store is the *best performer overall* (sales, customers, inventory turnover)?
29. Which store is the *worst performer overall*?
30. Which stores can be considered for *expansion* based on strong sales and customer metrics?


SyntaxError: invalid character '÷' (U+00F7) (3750015274.py, line 23)

In [2]:
import numpy as np

# Manually created array: [Store ID, Sales, Customers, Inventory, Returns]
retail_data = np.array([
    [1, 25000, 300, 1200, 10],
    [2, 32000, 450, 1500, 15],
    [3, 28000, 380, 1100, 8],
    [4, 41000, 500, 1600, 20],
    [5, 35000, 420, 1400, 12],
    [6, 27000, 360, 1300, 9],
    [7, 39000, 480, 1700, 18],
    [8, 30000, 400, 1250, 14],
    [9, 45000, 550, 1800, 25],
    [10, 33000, 410, 1450, 11]
])

print(retail_data)

[[    1 25000   300  1200    10]
 [    2 32000   450  1500    15]
 [    3 28000   380  1100     8]
 [    4 41000   500  1600    20]
 [    5 35000   420  1400    12]
 [    6 27000   360  1300     9]
 [    7 39000   480  1700    18]
 [    8 30000   400  1250    14]
 [    9 45000   550  1800    25]
 [   10 33000   410  1450    11]]


In [None]:
## Basic Performance Analysis
1. What is the *total revenue* generated across all stores?
2. Which store recorded the *highest sales*?
3. Which store recorded the *lowest sales*?
4. What is the *average daily sales* per store?
5. How many stores achieved sales *above ₹35,000*?

In [3]:
# 1. What is the *total revenue* generated across all stores?
retail_data[:,1].sum()

np.int64(335000)

In [5]:
# 2. Which store recorded the *highest sales*?
retail_data[:,1].argmax()+1

np.int64(9)

In [6]:
# 3. Which store recorded the *lowest sales*?
retail_data[:,1].argmin()+1

np.int64(1)

In [None]:
#4. What is the *average daily sales* per store?
retail_data[:,1].mean()

np.float64(33500.0)

In [8]:
# How many stores achieved sales *above ₹35,000*?
retail_data[:,1]>35000


array([False, False, False,  True, False, False,  True, False,  True,
       False])

In [None]:
## Customer Insights
6. Which store had the *highest customer footfall*?
7. What is the *average number of customers* per store?
8. How many stores have *more than 450 customers*?
9. Which store has the *lowest customer count*?
10. Is there any store where *high sales do not correspond to high customers*?

In [11]:
# 6 . Which store had the *highest customer footfall*?
retail_data[:,2].argmax()+1

np.int64(9)

In [12]:
# What is the *average number of customers* per store?
retail_data[:,2].mean()

np.float64(425.0)

In [13]:
#8. How many stores have *more than 450 customers*?
retail_data[:,2]>450

array([False, False, False,  True, False, False,  True, False,  True,
       False])

In [14]:
# 9. Which store has the *lowest customer count*?
retail_data[:,2].argmin()

np.int64(0)

In [22]:
#10. Is there any store where *high sales do not correspond to high customers*?
x=retail_data[:,1].mean()
y=retail_data[:,2].mean()
res=retail_data[(retail_data[:,1]>x) & (retail_data[:,2]<y)]
res



array([[    5, 35000,   420,  1400,    12]])

In [None]:
## Inventory & Returns Analysis
11. What is the *total inventory* across all stores?
12. Which store has the *highest inventory stock*?
13. How many stores have *inventory less than 1300 units*?
14. Which store has the *highest return rate* (returns ÷ inventory)?
15. Which stores have *returns greater than 15 units*?


In [26]:
import numpy as np

# Manually created array: [Store ID, Sales, Customers, Inventory, Returns]
retail_data = np.array([
    [1, 25000, 300, 1200, 10],
    [2, 32000, 450, 1500, 15],
    [3, 28000, 380, 1100, 8],
    [4, 41000, 500, 1600, 20],
    [5, 35000, 420, 1400, 12],
    [6, 27000, 360, 1300, 9],
    [7, 39000, 480, 1700, 18],
    [8, 30000, 400, 1250, 14],
    [9, 45000, 550, 1800, 25],
    [10, 33000, 410, 1450, 11]
])

print(retail_data)

[[    1 25000   300  1200    10]
 [    2 32000   450  1500    15]
 [    3 28000   380  1100     8]
 [    4 41000   500  1600    20]
 [    5 35000   420  1400    12]
 [    6 27000   360  1300     9]
 [    7 39000   480  1700    18]
 [    8 30000   400  1250    14]
 [    9 45000   550  1800    25]
 [   10 33000   410  1450    11]]


In [23]:
#11. What is the *total inventory* across all stores?
retail_data[:,3].sum()

np.int64(14300)

In [25]:
#12. Which store has the *highest inventory stock*?
retail_data[:,3].argmax()+1

np.int64(9)

In [29]:
#13. How many stores have *inventory less than 1300 units*?
retail_data[retail_data[:,3]<1300]

array([[    1, 25000,   300,  1200,    10],
       [    3, 28000,   380,  1100,     8],
       [    8, 30000,   400,  1250,    14]])

In [40]:
#14. Which store has the *highest return rate* (returns ÷ inventory)?
x=retail_data[:,4]/retail_data[:,3]
retail_data[x.argmax()]

array([    9, 45000,   550,  1800,    25])

In [42]:
#15. Which stores have *returns greater than 15 units*?
retail_data[retail_data[:,4]>15]

array([[    4, 41000,   500,  1600,    20],
       [    7, 39000,   480,  1700,    18],
       [    9, 45000,   550,  1800,    25]])

In [None]:
## Sales & Customer Correlation
16. Is there a *positive correlation* between sales and customers?
17. Which store has *high customer visits but low sales*?
18. Which store has *low customers but high sales*?
19. How many stores have *sales per customer above ₹90*?
20. Which stores could *benefit from customer acquisition campaigns* (low customers, high inventory)?

In [43]:
import numpy as np

# Manually created array: [Store ID, Sales, Customers, Inventory, Returns]
retail_data = np.array([
    [1, 25000, 300, 1200, 10],
    [2, 32000, 450, 1500, 15],
    [3, 28000, 380, 1100, 8],
    [4, 41000, 500, 1600, 20],
    [5, 35000, 420, 1400, 12],
    [6, 27000, 360, 1300, 9],
    [7, 39000, 480, 1700, 18],
    [8, 30000, 400, 1250, 14],
    [9, 45000, 550, 1800, 25],
    [10, 33000, 410, 1450, 11]
])

print(retail_data)

[[    1 25000   300  1200    10]
 [    2 32000   450  1500    15]
 [    3 28000   380  1100     8]
 [    4 41000   500  1600    20]
 [    5 35000   420  1400    12]
 [    6 27000   360  1300     9]
 [    7 39000   480  1700    18]
 [    8 30000   400  1250    14]
 [    9 45000   550  1800    25]
 [   10 33000   410  1450    11]]


In [44]:
#16. Is there a *positive correlation* between sales and customers?
s=retail_data[:,1]
c=retail_data[:,2]
np.corrcoef(s,c)[0,1]

np.float64(0.9593636912502479)

In [47]:
#17. Which store has *high customer visits but low sales*?
x=retail_data[:,1].mean()
y=retail_data[:,2].mean()
ans=retail_data[(retail_data[:,2]>y) & (retail_data[:,1]<x)]
ans


array([[    2, 32000,   450,  1500,    15]])

In [48]:
#18. Which store has *low customers but high sales*?
x=retail_data[:,1].mean()
y=retail_data[:,2].mean()
ans=retail_data[(retail_data[:,2]<y) & (retail_data[:,1]>x)]
ans

array([[    5, 35000,   420,  1400,    12]])

In [64]:
#19. How many stores have *sales per customer above ₹90*?
x=retail_data[:,1]/retail_data[:,2]
retail_data[x>90]


array([], shape=(0, 5), dtype=int64)

In [67]:
#20. Which stores could *benefit from customer acquisition campaigns* (low customers, high inventory)?
c=retail_data[:,2].mean()
i=retail_data[:,3].mean()
ans=retail_data[(retail_data[:,2]<c) & (retail_data[:,3]>i)]
ans

array([[   10, 33000,   410,  1450,    11]])

In [None]:
## Profitability & Growth
21. If profit margin is *20% of sales, what is the **profit per store*?
22. Which store has the *highest profit*?
23. Which store has the *lowest profit*?
24. If sales increase by *10% next month, what are the **new sales figures*?
25. How will *total profit* change with the 10% sales increase?

In [81]:
#21. If profit margin is *20% of sales, what is the **profit per store*?
retail_data[:,1]*0.2




array([5000., 6400., 5600., 8200., 7000., 5400., 7800., 6000., 9000.,
       6600.])

In [None]:
#22. Which store has the *highest profit*?
import numpy as np
x=(retail_data[:,1]*0.2)
np.argmax(x)+1   # profit is 9000

np.int64(9)

In [86]:
#23. Which store has the *lowest profit*?
x=(retail_data[:,1]*0.2)
np.argmin(x)+1

np.int64(1)

In [None]:
#24. If sales increase by *10% next month, what are the **new sales figures*?
old=retail_data[:,1]
increase=old*0.1
new=old+increase
new


array([27500., 35200., 30800., 45100., 38500., 29700., 42900., 33000.,
       49500., 36300.])

In [None]:
#25. How will *total profit* change with the 10% sales increase?
old=retail_data[:,1]*0.2     # previously mentioned question no 21 there was 20% profit for sales
increase=old*0.1   # 10% sales increased
new=old+increase
x=sum(old)
y=sum(new)
print(y-x)



6700.0


In [None]:
## Operational Strategy
26. Which stores need *inventory restocking* based on sales and current stock?
27. Which stores show *high returns percentage* and might require *quality checks*?
28. Which store is the *best performer overall* (sales, customers, inventory turnover)?
29. Which store is the *worst performer overall*?
30. Which stores can be considered for *expansion* based on strong sales and customer metrics?

In [101]:
import numpy as np

# Manually created array: [Store ID, Sales, Customers, Inventory, Returns]
retail_data = np.array([
    [1, 25000, 300, 1200, 10],
    [2, 32000, 450, 1500, 15],
    [3, 28000, 380, 1100, 8],
    [4, 41000, 500, 1600, 20],
    [5, 35000, 420, 1400, 12],
    [6, 27000, 360, 1300, 9],
    [7, 39000, 480, 1700, 18],
    [8, 30000, 400, 1250, 14],
    [9, 45000, 550, 1800, 25],
    [10, 33000, 410, 1450, 11]
])

print(retail_data)

[[    1 25000   300  1200    10]
 [    2 32000   450  1500    15]
 [    3 28000   380  1100     8]
 [    4 41000   500  1600    20]
 [    5 35000   420  1400    12]
 [    6 27000   360  1300     9]
 [    7 39000   480  1700    18]
 [    8 30000   400  1250    14]
 [    9 45000   550  1800    25]
 [   10 33000   410  1450    11]]


In [103]:
#26. Which stores need *inventory restocking* based on sales and current stock?
x=retail_data[:,1].mean()
y=retail_data[:,3].mean()
res=retail_data[(retail_data[:,1]>x) & (retail_data[:,3]<y)]
ans


array([[   10, 33000,   410,  1450,    11]])

In [None]:
#27. Which stores show *high returns percentage* and might require *quality checks*?
r=(retail_data[0:,4]/retail_data[0:,1])*100     #returns percentage = (Returns ÷ Sales) × 10 
avg=r.mean() #Find the average returns percentag
retail_data[r>avg]   # quality checks : returns percentage > average returns percentage

array([[    2, 32000,   450,  1500,    15],
       [    4, 41000,   500,  1600,    20],
       [    7, 39000,   480,  1700,    18],
       [    8, 30000,   400,  1250,    14],
       [    9, 45000,   550,  1800,    25]])

In [116]:
#28. Which store is the *best performer overall* (sales, customers, inventory turnover)?
import numpy as np
sales=retail_data[0:,1]/max(retail_data[0:,1])
cust= retail_data[0:,2]/max(retail_data[0:,2])
inventory_turnover=retail_data[:,1]/retail_data[:,3]
turnover=inventory_turnover/max(inventory_turnover)
best_perf=sales+cust+turnover
np.argmax(best_perf)+1





np.int64(9)

In [117]:
#29. Which store is the *worst performer overall*?
import numpy as np
sales=retail_data[0:,1]/max(retail_data[0:,1])
cust= retail_data[0:,2]/max(retail_data[0:,2])
inventory_turnover=retail_data[:,1]/retail_data[:,3]
turnover=inventory_turnover/max(inventory_turnover)
best_perf=sales+cust+turnover
np.argmin(best_perf)+1


np.int64(1)

In [119]:
#30 Which stores can be considered for *expansion* based on strong sales and customer metrics?
sales=retail_data[0:,1].mean()
cust=retail_data[0:,2].mean()
retail_data[(retail_data[0:,1]>sales) & (retail_data[0:,2]>cust)]

array([[    4, 41000,   500,  1600,    20],
       [    7, 39000,   480,  1700,    18],
       [    9, 45000,   550,  1800,    25]])