In [7]:
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv("100_Sales.csv")



In [18]:
# Revenue Efficiency with Advanced Scaling by Region
def revenue_efficiency_scaled(total_profit, total_revenue, region):
    if total_revenue == 0:
        return np.nan  # Handle division by zero
    region_scaling = {'Europe': 1.2, 'Asia': 1.0, 'Africa': 0.9, 'America': 1.1, 'Oceania': 0.8}
    scale = region_scaling.get(region, 1.0)
    
    # Base efficiency calculation
    efficiency = (total_profit / total_revenue) * 100 * scale

    # Apply logarithmic transformation for non-linear scaling
    return np.log1p(efficiency)  # log1p ensures no errors for small efficiency values

# Vectorize the function for application across the dataset
revenue_efficiency_scaled_ufunc = np.vectorize(revenue_efficiency_scaled)

# Apply the function to the dataset
data['Revenue_Efficiency_Scaled_Advanced'] = revenue_efficiency_scaled_ufunc(
    data['Total_Profit'], 
    data['Total_Revenue'], 
    data['Region']
)



Scaling Logic:

Europe has higher weights (1.2) due to a strong market presence.

Oceania has a lower weight (0.8) due to fewer sales.

Default weight is 1.0 for unknown regions.

The $[np.log1p()]$ function computes 
$ln
⁡
(
1
+
𝑥
)
$, which is more stable for small 
𝑥 values compared to np.log().

This is particularly useful for datasets where efficiency metrics might be small or vary significantly.

By using log1p, you showcase an understanding of how to handle non-linear relationships and normalize data ranges.


In [21]:
# Enhanced Profitability Index with Advanced Weight Logic
def profitability_index_extended(total_profit, unit_cost, priority, item_type):
    # Weights based on priority
    priority_weights = {'L': 1.0, 'M': 1.2, 'H': 1.5, 'C': 1.8}
    priority_weight = priority_weights.get(priority, 1.0)  # Default weight = 1.0 if priority not found
    
    # Dynamic weights for item type
    item_type_weights = {
        'Clothes': 0.9, 'Cosmetics': 1.3, 'Baby Food': 1.5,
        'Vegetables': 1.0, 'Personal Care': 1.1, 'Beverages': 1.2,
        'Household': 1.4, 'Office Supplies': 0.8
    }
    item_weight = item_type_weights.get(item_type, 1.0)  # Default weight = 1.0 if item_type not found
    
    # Incorporate additional scaling for string length of item type
    dynamic_item_weight = item_weight * (1 + len(item_type) / 10)
    
    # Basic profitability index calculation
    profitability = (total_profit / unit_cost) * priority_weight * dynamic_item_weight

    # Non-linear transformation to normalize the scale
    return np.sqrt(profitability)

# Vectorize the enhanced function
profitability_index_extended_ufunc = np.vectorize(profitability_index_extended)

# Apply the function to the dataset
data['Profitability_Index_Extended_Advanced'] = profitability_index_extended_ufunc(
    data['Total_Profit'], 
    data['Unit_Cost'], 
    data['Order_Priority'], 
    data['Item_Type']
)

Weighs profitability based on Order Priority and Item_Type, showcasing a multi-faceted approach.

Critical orders (C) have the highest weight (1.8).

Premium item types (e.g., Baby Food) also have higher weights.

This highlights how different business aspects interact to determine profitability.

Using np.sqrt() to normalize and reduce the impact of extreme values.

In [10]:
# Cost Efficiency Metric with NumPy Integration
def cost_efficiency(unit_cost, total_profit, priority):
    priority_weights = np.array([1.0, 1.3, 1.7, 2.0])  # Priority weight values
    priorities = np.array(['L', 'M', 'H', 'C'])        # Priority categories
    # Find the matching weight or default to 1.0
    weight = np.where(priorities == priority, priority_weights, 1.0).max()
    # Compute cost efficiency
    return (total_profit / unit_cost) * weight

# Vectorize for application on arrays
cost_efficiency_ufunc = np.vectorize(cost_efficiency)


 Measures how efficiently costs are converted into profit, factoring in Order Priority

Critical orders (C) show the highest priority weight for cost efficiency.

Designed to evaluate how cost-sensitive operations influence profitability.

The np.where function determines the appropriate weight based on the priority.

In [11]:
# Apply Ufuncs to the DataFrame
data['Revenue_Efficiency_Scaled'] = revenue_efficiency_scaled_ufunc(
    data['Total_Profit'], data['Total_Revenue'], data['Region']
)

data['Profitability_Index_Extended'] = profitability_index_extended_ufunc(
    data['Total_Profit'], data['Unit_Cost'], data['Order_Priority'], data['Item_Type']
)

data['Cost_Efficiency'] = cost_efficiency_ufunc(
    data['Unit_Cost'], data['Total_Profit'], data['Order_Priority']
)


In [25]:
# Formatting and Display of Results
formatted_data = data[['Region', 'Total_Profit', 'Total_Revenue', 'Revenue_Efficiency_Scaled',
                       'Profitability_Index_Extended', 'Cost_Efficiency']].copy()

# Round numerical values to 2 decimal places
formatted_data['Revenue_Efficiency_Scaled'] = formatted_data['Revenue_Efficiency_Scaled'].round(2)
formatted_data['Profitability_Index_Extended'] = formatted_data['Profitability_Index_Extended'].round(2)
formatted_data['Cost_Efficiency'] = formatted_data['Cost_Efficiency'].round(2)

# Renaming columns for better readability
formatted_data.rename(columns={
    'Total_Profit': 'Profit ($)',
    'Total_Revenue': 'Revenue ($)',
    'Revenue_Efficiency_Scaled': 'Revenue Efficiency (%)',
    'Profitability_Index_Extended': 'Profitability Index',
    'Cost_Efficiency': 'Cost Efficiency'
}, inplace=True)

# Display the formatted table (top 11 rows for clarity)
print("\nEnhanced Metrics Table (Top 11 Rows):")
print(formatted_data.head(11).to_string(index=False))



Enhanced Metrics Table (Top 11 Rows):
                           Region  Profit ($)  Revenue ($)  Revenue Efficiency (%)  Profitability Index  Cost Efficiency
            Australia and Oceania   951410.50   2533654.00                   37.55             13427.89         10145.51
Central America and the Caribbean   248406.36    576782.80                   43.07              3818.05          4242.27
                           Europe   224598.75   1158502.59                   23.26               342.27           427.84
               Sub_Saharan Africa    19525.82     75591.66                   25.83              5078.97          5643.30
               Sub_Saharan Africa   639077.50   3296425.02                   19.39               973.91          1217.38
            Australia and Oceania   285087.64    759202.72                   37.55              4828.36          3576.56
               Sub_Saharan Africa   693911.51   2798046.49                   24.80              2319.76          1

used multiple numpy universal functions like -
np.nan,
np.log1p,
np.vectorize,
np.sqrt,
np.where etc
