**import Module**

In [199]:
import csv
import numpy as np

**Open CSV File**

In [200]:
with open("amazon_sales_dataset.csv") as file:
    reader = csv.reader(file)
    data = list(reader)

**Remove the header row**

In [201]:
data = data[1:]
data

[['OD1000', 'P500', 'Grocery', '122', '4', '20', '1.7'],
 ['OD1001', 'P501', 'Electronics', '45232', '5', '30', '2.8'],
 ['OD1002', 'P502', 'Grocery', '234', '3', '10', '4.5'],
 ['OD1003', 'P503', 'Fashion', '330', '5', '5', '3.9'],
 ['OD1004', 'P504', 'Electronics', '2933', '4', '20', '3.5'],
 ['OD1005', 'P505', 'Home', '19869', '5', '15', '1.1'],
 ['OD1006', 'P506', 'Electronics', '41934', '3', '30', '1.6'],
 ['OD1007', 'P507', 'Electronics', '3247', '3', '15', '3.1'],
 ['OD1008', 'P508', 'Fashion', '1728', '3', '20', '3.4'],
 ['OD1009', 'P509', 'Grocery', '40', '1', '30', '1.1'],
 ['OD1010', 'P510', 'Fashion', '800', '4', '25', '4.2'],
 ['OD1011', 'P511', 'Grocery', '796', '2', '20', '1.9'],
 ['OD1012', 'P512', 'Grocery', '111', '4', '30', '3.4'],
 ['OD1013', 'P513', 'Grocery', '795', '3', '25', '2.6'],
 ['OD1014', 'P514', 'Books', '660', '4', '5', '2.7'],
 ['OD1015', 'P515', 'Electronics', '44073', '2', '15', '1.7'],
 ['OD1016', 'P516', 'Home', '13090', '2', '5', '2.8'],
 ['OD1017'

#**Load into NumPy**

**STEP 1: Extract price column**

In [202]:
prices = np.array([int(row[3]) for row in data])

**STEP 2: Extract quantity**

In [203]:
quanity = np.array([int(row[4]) for row in data])

**STEP 3: Extract discount**

In [204]:
discount = np.array([int(row[5]) for row in data])

**STEP 4: Extract rating**

In [205]:
rating = np.array([float(row[6]) for row in data])

**STEP 5: Extract category**

In [206]:
categories = np.array([row[2] for row in data])

#**Revenue Calculations**

**STEP 1: Basic revenue**

In [207]:
revenue = prices * quanity

**STEP 2: Apply discount**

In [208]:
final_prices = prices * (1 - (discount/100))
final_revenue = final_prices * quanity

#**Filtering**

**A. HIGH RATING ORDERS (>4.5)**

In [209]:
high_rating_mask = rating >= 4.5
high_rating_orders = final_revenue[high_rating_mask]

**B. Electronics category only**

In [210]:
elec_mask = categories == "Electronics"
electronics_orders = final_revenue[elec_mask]
electronics_orders

array([158312.  ,   9385.6 ,  88061.4 ,   8279.85,  74924.1 ,  48136.  ,
        34801.2 ,   6822.9 ,  30955.5 , 144648.  ,  41821.7 ,   5227.2 ,
        23283.2 ,  47644.  ,   8189.  ,  26830.25,  63745.5 , 189040.  ,
        73852.8 ,  37893.  , 164940.  , 171640.  ,  40556.  ,  69354.  ,
        11341.8 ,  25758.  ,  26415.7 ,  64762.5 ,  50681.6 ,  94345.75,
        55472.4 ,  37340.  ,  99024.  ,  69960.  ,  71668.  ,  33582.5 ,
        50311.5 ,  20440.8 ,   5526.  ,  46153.6 ,  86673.6 , 170986.5 ,
       107154.3 ,   9699.5 ,  45261.8 , 137310.  , 130368.  ,  34799.  ,
       184960.  ,  94765.5 ,  62877.9 ,  82701.  ,  58303.2 ,  32427.2 ,
       118756.  ,  52335.  ,  84758.4 ,  22449.  ,  38126.  ,  57115.8 ,
        13661.25,  22758.75,  33790.5 ,  55606.5 , 183660.  ,  87681.6 ,
        88353.75,  69484.8 , 228310.  ,  11814.6 , 125241.6 ,  64349.1 ,
       199108.  ,  81301.  ,  37283.7 , 104193.75, 152396.5 ,  17137.5 ,
        25565.4 ,  17744.4 ,   7224.  ,  25986.  , 

#**Category-Level Analytics**

**STEP 1: Find unique categories**

In [211]:
unique_cats = np.unique(categories)

**STEP 2: Compute metrics for each category**

In [212]:
for cat in unique_cats:
    mask = categories == cat
    avg_price = prices[mask].mean()
    total_rev = final_revenue[mask].sum()
    count = mask.sum()

    print(cat)
    print(avg_price)
    print(total_rev)
    print(count)

Beauty
1509.0357142857142
239815.05
56
Books
482.0744680851064
107603.55
94
Electronics
26159.764150943396
21415971.75
318
Fashion
1569.1593625498008
948838.6
251
Grocery
466.3
156957.85
140
Home
9482.687943262412
3935162.5
141


**Outlier Detection (Data Quality Step)**

In [213]:
mean_r = final_revenue.mean()
std_r = final_revenue.std()

outliers = final_revenue[
	(final_revenue > mean_r + 3*std_r) |
	(final_revenue < mean_r - 3*std_r)
]

**Save Clean Data (Export Final CSV)**

**Create final matrix:**

In [214]:
clean_matrix = np.column_stack((final_prices, final_revenue, rating))

**Save it:**

In [215]:
np.savetxt("amazon_cleaned.csv",
           clean_matrix,
           delimiter=",",
           fmt="%.2f",
           header="Final_Price, Final_Revenue, Ratings",
           comments="")