In [None]:
# @title
import numpy as np

data = np.genfromtxt(
    "datasets/sales_data.csv",
    delimiter=",",
    skip_header=1,
    encoding="latin1",
    filling_values=np.nan,
    invalid_raise=False
)
data.shape
np.isnan(data).sum()
np.isnan(data).sum(axis=0)


In [None]:
print("t")

In [None]:
# @title
import numpy as np

data = np.genfromtxt(
    "/sales_data.csv",
    delimiter=",",
    skip_header=1,
    encoding="latin1",
    filling_values=np.nan,
    invalid_raise=False
)
data.shape
np.isnan(data).sum()


In [None]:
# @title
import numpy as np

data = np.genfromtxt(
    "/sales_data.csv",
    delimiter=",",
    names=True,              # uses header names
    dtype=None,              # auto-detect types (important)
    encoding="latin1",
    invalid_raise=False
)
columns_needed = [
    'ORDERNUMBER', 'QUANTITYORDERED', 'PRICEEACH', 'ORDERLINENUMBER',
    'SALES', 'ORDERDATE', 'STATUS', 'MONTH_ID', 'YEAR_ID',
    'PRODUCTLINE', 'MSRP', 'PRODUCTCODE', 'CUSTOMERNAME',
    'COUNTRY', 'DEALSIZE'
]

filtered_data = data[columns_needed]
print("\t".join(columns_needed))

for row in filtered_data[:10]:
  print("\t".join(str(row[col]) for col in columns_needed))
  # Extract SALES column
sales = filtered_data['SALES']

# Remove missing values (important)
sales = sales[~np.isnan(sales)]

# Total sales
total_sales = np.sum(sales)

print("\nTotal Sales:", total_sales)



In [None]:
# @title
years = filtered_data['YEAR_ID']
sales = filtered_data['SALES']
unique_years = np.unique(years)
print("YEAR_ID\tTOTAL_SALES")

for year in unique_years:
    year_sales = np.sum(sales[years == year])
    print(f"{year}\t{year_sales}")


In [None]:
# @title
# Store total sales per year
yearly_sales = np.array([
    np.sum(sales[years == year]) for year in unique_years
])

# Find index of highest and lowest sales
max_index = np.argmax(yearly_sales)
min_index = np.argmin(yearly_sales)

# Get corresponding years and sales values
highest_sales_year = unique_years[max_index]
lowest_sales_year = unique_years[min_index]

highest_sales_value = yearly_sales[max_index]
lowest_sales_value = yearly_sales[min_index]

print("\nYear with Highest Sales:", highest_sales_year, "->", highest_sales_value)
print("Year with Lowest Sales:", lowest_sales_year, "->", lowest_sales_value)


In [None]:
# @title
# Extract product and sales columns
products = filtered_data['PRODUCTCODE']
sales = filtered_data['SALES']

# Remove rows with missing sales
valid = ~np.isnan(sales)
products = products[valid]
sales = sales[valid]

# Unique products
unique_products = np.unique(products)

# Total sales per product
product_sales = np.array([
    np.sum(sales[products == product]) for product in unique_products
])

# Find most and least sold products
max_index = np.argmax(product_sales)
min_index = np.argmin(product_sales)

most_sold_product = unique_products[max_index]
least_sold_product = unique_products[min_index]

most_sold_value = product_sales[max_index]
least_sold_value = product_sales[min_index]

print("\nMost Sold Product:", most_sold_product, "->", most_sold_value)
print("Least Sold Product:", least_sold_product, "->", least_sold_value)


In [None]:
# @title
# Extract required columns
years = filtered_data['YEAR_ID']
productlines = filtered_data['PRODUCTLINE']
status = filtered_data['STATUS']
sales = filtered_data['SALES']

# Remove rows with missing sales
valid = ~np.isnan(sales)
years = years[valid]
productlines = productlines[valid]
status = status[valid]
sales = sales[valid]

unique_years = np.unique(years)

for year in unique_years:
    print(f"\nYEAR: {year}")

    # Filter data for this year
    year_mask = years == year

    year_products = productlines[year_mask]
    year_status = status[year_mask]
    year_sales = sales[year_mask]

    # -------- SHIPPED --------
    shipped_mask = year_status == 'Shipped'
    shipped_products = year_products[shipped_mask]
    shipped_sales = year_sales[shipped_mask]

    if shipped_sales.size > 0:
        unique_shipped_products = np.unique(shipped_products)

        shipped_totals = np.array([
            np.sum(shipped_sales[shipped_products == p])
            for p in unique_shipped_products
        ])

        max_ship_idx = np.argmax(shipped_totals)
        min_ship_idx = np.argmin(shipped_totals)

        print("  Highest SHIPPED ProductLine:",
              unique_shipped_products[max_ship_idx], "->",
              shipped_totals[max_ship_idx])

        print("  Lowest SHIPPED ProductLine:",
              unique_shipped_products[min_ship_idx], "->",
              shipped_totals[min_ship_idx])
    else:
        print("  No SHIPPED data")

    # -------- CANCELLED --------
    cancelled_mask = year_status == 'Cancelled'
    cancelled_products = year_products[cancelled_mask]
    cancelled_sales = year_sales[cancelled_mask]

    if cancelled_sales.size > 0:
        unique_cancelled_products = np.unique(cancelled_products)

        cancelled_totals = np.array([
            np.sum(cancelled_sales[cancelled_products == p])
            for p in unique_cancelled_products
        ])

        max_can_idx = np.argmax(cancelled_totals)
        min_can_idx = np.argmin(cancelled_totals)

        print("  Highest CANCELLED ProductLine:",
              unique_cancelled_products[max_can_idx], "->",
              cancelled_totals[max_can_idx])

        print("  Lowest CANCELLED ProductLine:",
              unique_cancelled_products[min_can_idx], "->",
              cancelled_totals[min_can_idx])
    else:
        print("  No CANCELLED data")



In [None]:
# @title
# Extract required columns
years = filtered_data['YEAR_ID']
productlines = filtered_data['PRODUCTLINE']
status = filtered_data['STATUS']

# Filter only "In Process" records
inprocess_mask = status == 'In Process'

inprocess_years = years[inprocess_mask]
inprocess_productlines = productlines[inprocess_mask]

# Unique years
unique_years = np.unique(inprocess_years)

for year in unique_years:
    print(f"\nYEAR: {year}")

    # Filter for this year
    year_mask = inprocess_years == year
    year_productlines = inprocess_productlines[year_mask]

    # Unique productlines for this year
    unique_productlines = np.unique(year_productlines)

    for pl in unique_productlines:
        count = np.sum(year_productlines == pl)
        print(f"  {pl}: {count}")


In [None]:
# @title
 # Extract required columns
years = filtered_data['YEAR_ID']
customers = filtered_data['CUSTOMERNAME']
sales = filtered_data['SALES']

# Remove rows with missing sales
valid = ~np.isnan(sales)
years = years[valid]
customers = customers[valid]
sales = sales[valid]

# Unique years
unique_years = np.unique(years)

for year in unique_years:
    print(f"\nYEAR: {year}")

    # Filter data for this year
    year_mask = years == year
    year_customers = customers[year_mask]
    year_sales = sales[year_mask]

    # Unique customers in this year
    unique_customers = np.unique(year_customers)

    # Total sales per customer
    customer_totals = np.array([
        np.sum(year_sales[year_customers == cust])
        for cust in unique_customers
    ])

    # Find highest sales customer
    max_index = np.argmax(customer_totals)

    print("  Customer with Highest Sales:",
          unique_customers[max_index], "->",
          customer_totals[max_index])


In [None]:
# @title
# Extract required columns
years = filtered_data['YEAR_ID']
countries = filtered_data['COUNTRY']
sales = filtered_data['SALES']

# Remove rows with missing sales
valid = ~np.isnan(sales)
years = years[valid]
countries = countries[valid]
sales = sales[valid]

# Unique years
unique_years = np.unique(years)

for year in unique_years:
    print(f"\nYEAR: {year}")

    # Filter data for this year
    year_mask = years == year
    year_countries = countries[year_mask]
    year_sales = sales[year_mask]

    # Unique countries in this year
    unique_countries = np.unique(year_countries)

    # Total sales per country
    country_totals = np.array([
        np.sum(year_sales[year_countries == country])
        for country in unique_countries
    ])

    # Find country with highest sales
    max_index = np.argmax(country_totals)

    print("  Country with Highest Sales:",
          unique_countries[max_index], "->",
          country_totals[max_index])


In [None]:
# @title
years = filtered_data['YEAR_ID']
months = filtered_data['MONTH_ID']
sales = filtered_data['SALES']
products = filtered_data['PRODUCTCODE']
productlines = filtered_data['PRODUCTLINE']
status = filtered_data['STATUS']
customers = filtered_data['CUSTOMERNAME']
countries = filtered_data['COUNTRY']

# Remove rows with missing sales
valid = ~np.isnan(sales)
years = years[valid]
months = months[valid]
sales = sales[valid]
products = products[valid]
productlines = productlines[valid]
status = status[valid]
customers = customers[valid]
countries = countries[valid]

unique_years = np.unique(years)

for year in unique_years:
    print(f"\nYEAR: {year}")
    year_mask = years == year

    year_months = months[year_mask]
    year_sales = sales[year_mask]

    unique_months = np.unique(year_months)
    monthly_totals = []

    for m in unique_months:
        total = np.sum(year_sales[year_months == m])
        monthly_totals.append(total)
        print(f"  Month {m} Total Sales: {total}")

    monthly_totals = np.array(monthly_totals)

    print("  Highest Sales Month:", unique_months[np.argmax(monthly_totals)])
    print("  Lowest Sales Month:", unique_months[np.argmin(monthly_totals)])


In [None]:
# @title
for year in unique_years:
    print(f"\nYEAR: {year}")
    year_mask = years == year

    for month in np.unique(months[year_mask]):
        print(f"  Month: {month}")

        mask = (years == year) & (months == month)
        m_products = products[mask]
        m_sales = sales[mask]

        unique_products = np.unique(m_products)
        totals = np.array([
            np.sum(m_sales[m_products == p]) for p in unique_products
        ])

        print("    Max Product:", unique_products[np.argmax(totals)])
        print("    Min Product:", unique_products[np.argmin(totals)])


In [None]:
# @title
for year in unique_years:
    print(f"\nYEAR: {year}")

    for month in np.unique(months[years == year]):
        print(f"  Month: {month}")

        base_mask = (years == year) & (months == month)

        for st in ['Shipped', 'Cancelled']:
            st_mask = base_mask & (status == st)

            if np.sum(st_mask) == 0:
                print(f"    {st}: No data")
                continue

            pl = productlines[st_mask]
            sl = sales[st_mask]

            unique_pl = np.unique(pl)
            totals = np.array([
                np.sum(sl[pl == p]) for p in unique_pl
            ])

            print(f"    {st} Highest:",
                  unique_pl[np.argmax(totals)])
            print(f"    {st} Lowest:",
                  unique_pl[np.argmin(totals)])


In [None]:
# @title
for year in unique_years:
    print(f"\nYEAR: {year}")

    for month in np.unique(months[years == year]):
        print(f"  Month: {month}")

        mask = (
            (years == year) &
            (months == month) &
            (status == 'In Process')
        )

        if np.sum(mask) == 0:
            print("    No In-Process products")
            continue

        pl = productlines[mask]
        for p in np.unique(pl):
            print(f"    {p}: {np.sum(pl == p)}")


In [None]:
# @title
for year in unique_years:
    print(f"\nYEAR: {year}")

    for month in np.unique(months[years == year]):
        print(f"  Month: {month}")

        mask = (years == year) & (months == month)

        # CUSTOMER
        m_customers = customers[mask]
        m_sales = sales[mask]

        unique_cust = np.unique(m_customers)
        cust_totals = np.array([
            np.sum(m_sales[m_customers == c]) for c in unique_cust
        ])

        print("    Top Customer:",
              unique_cust[np.argmax(cust_totals)])

        # COUNTRY
        m_countries = countries[mask]
        unique_ctry = np.unique(m_countries)
        ctry_totals = np.array([
            np.sum(m_sales[m_countries == c]) for c in unique_ctry
        ])

        print("    Top Country:",
              unique_ctry[np.argmax(ctry_totals)])


In [None]:
# @title
unique_products = np.unique(products)

max_prices = np.array([
    np.max(prices[products == p]) for p in unique_products
])

max_index = np.argmax(max_prices)

print("Highest Priced Product:", unique_products[max_index])
print("Highest Price:", max_prices[max_index])


In [None]:
# @title
# Extract product column
products = filtered_data['PRODUCTCODE']

# Get unique products
unique_products = np.unique(products)

# Count orders for each product
order_counts = np.array([
    np.sum(products == p) for p in unique_products
])

# Find product with maximum orders
max_index = np.argmax(order_counts)

most_ordered_product = unique_products[max_index]
max_orders = order_counts[max_index]

print("Product with Maximum Orders:", most_ordered_product)
print("Number of Orders:", max_orders)


In [None]:
# @title
years = filtered_data['YEAR_ID']
sales = filtered_data['SALES']

# Remove missing sales
valid = ~np.isnan(sales)
years = years[valid]
sales = sales[valid]

unique_years = np.unique(years)

print("YEAR_ID\tTOTAL_REVENUE")

for year in unique_years:
    year_revenue = np.sum(sales[years == year])
    print(f"{year}\t{year_revenue}")


In [None]:
# @title
months = filtered_data['MONTH_ID']
months = months[valid]

for year in unique_years:
    print(f"\nYEAR: {year}")

    year_mask = years == year
    year_months = months[year_mask]
    year_sales = sales[year_mask]

    unique_months = np.unique(year_months)

    for month in unique_months:
        month_revenue = np.sum(year_sales[year_months == month])
        print(f"  Month {month}: {month_revenue}")
