<a href="https://colab.research.google.com/github/Vikrampaswan07/Assignment-of-Business-Analyst-Intern-Jar/blob/main/Sales_and_Profitability_Analysis_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Step 1: Import Pandas

In [None]:
import pandas as pd

Step 2: Load the Datasets

In [None]:
df_orders = pd.read_excel("/content/List_of_Orders_1.xlsx")
df_details = pd.read_excel("/content/Order_Details_1.xlsx")

In [None]:
print(df_orders.head())

  Order ID Order Date CustomerName           State       City
0  B-25601 2018-04-01       Bharat         Gujarat  Ahmedabad
1  B-25602 2018-04-01        Pearl     Maharashtra       Pune
2  B-25603 2018-04-03        Jahan  Madhya Pradesh     Bhopal
3  B-25604 2018-04-03       Divsha       Rajasthan     Jaipur
4  B-25605 2018-04-05      Kasheen     West Bengal    Kolkata


In [None]:
print(df_details.head())

  Order ID  Amount  Profit  Quantity     Category      Sub-Category
0  B-25601    1275   -1148         7    Furniture         Bookcases
1  B-25601      66     -12         5     Clothing             Stole
2  B-25601       8      -2         3     Clothing       Hankerchief
3  B-25601      80     -56         4  Electronics  Electronic Games
4  B-25602     168    -111         2  Electronics            Phones


Step 3: Merge the Datasets on 'Order ID'

In [None]:
df_merged = pd.merge(df_orders, df_details, on='Order ID')

In [None]:
print(df_merged.head())

  Order ID Order Date CustomerName        State       City  Amount  Profit  \
0  B-25601 2018-04-01       Bharat      Gujarat  Ahmedabad    1275   -1148   
1  B-25601 2018-04-01       Bharat      Gujarat  Ahmedabad      66     -12   
2  B-25601 2018-04-01       Bharat      Gujarat  Ahmedabad       8      -2   
3  B-25601 2018-04-01       Bharat      Gujarat  Ahmedabad      80     -56   
4  B-25602 2018-04-01        Pearl  Maharashtra       Pune     168    -111   

   Quantity     Category      Sub-Category  
0         7    Furniture         Bookcases  
1         5     Clothing             Stole  
2         3     Clothing       Hankerchief  
3         4  Electronics  Electronic Games  
4         2  Electronics            Phones  


Step 4: Aggregate Sales, Profit, and Order Count

In [None]:
# Group by 'Category' and aggregate all required metrics
category_analysis = df_merged.groupby('Category').agg(
    Total_Sales=('Amount', 'sum'),
    Total_Profit=('Profit', 'sum'),
    Total_Orders=('Order ID', 'nunique')
).reset_index() # .reset_index() turns the grouped data back into a DataFrame

print("--- Aggregated Metrics (Sales, Profit, Order Count) ---")
print(category_analysis)

--- Aggregated Metrics (Sales, Profit, Order Count) ---
      Category  Total_Sales  Total_Profit  Total_Orders
0     Clothing       139054         11163           393
1  Electronics       165267         10494           204
2    Furniture       127181          2298           186


Step 5: Calculate Average Profit per Order

In [None]:
# Calculate the Average Profit per Order
category_analysis['Average_Profit_per_Order'] = category_analysis['Total_Profit'] / category_analysis['Total_Orders']

print("\n--- Added Average Profit per Order ---")
print(category_analysis[['Category', 'Average_Profit_per_Order']])



--- Added Average Profit per Order ---
      Category  Average_Profit_per_Order
0     Clothing                 28.404580
1  Electronics                 51.441176
2    Furniture                 12.354839


Step 6: Calculate Profit Margin (%)

In [None]:
# Calculate the Profit Margin percentage
category_analysis['Profit_Margin_%'] = (category_analysis['Total_Profit'] / category_analysis['Total_Sales']) * 100

print("\n--- Added Profit Margin (%) ---")
print(category_analysis[['Category', 'Profit_Margin_%']])


--- Added Profit Margin (%) ---
      Category  Profit_Margin_%
0     Clothing         8.027817
1  Electronics         6.349725
2    Furniture         1.806874


Step 7: Sort the Results

In [None]:
# Sort the results by Profit_Margin_% to find top/bottom performers
category_analysis_sorted = category_analysis.sort_values(by='Profit_Margin_%', ascending=False)

print("\n--- Final Data, Sorted by Profit Margin ---")
print(category_analysis_sorted.to_string(index=False))


--- Final Data, Sorted by Profit Margin ---
   Category  Total_Sales  Total_Profit  Total_Orders  Average_Profit_per_Order  Profit_Margin_%
   Clothing       139054         11163           393                 28.404580         8.027817
Electronics       165267         10494           204                 51.441176         6.349725
  Furniture       127181          2298           186                 12.354839         1.806874


Step 8: Print the Final Formatted Report

In [None]:
print("\n--- Sales & Profitability Analysis by Category ---")

# Create a formatted string for the table header
header = f"{'Category':<12} | {'Total Sales':>13} | {'Total Profit':>13} | {'Total Orders':>12} | {'Avg Profit/Order':>17} | {'Profit Margin':>15}"
divider = "-" * len(header)

print(header)
print(divider)

# Print each row of the sorted dataframe with formatting
for index, row in category_analysis_sorted.iterrows():
    print(f"{row['Category']:<12} | ${row['Total_Sales']:>12,.2f} | ${row['Total_Profit']:>12,.2f} | {row['Total_Orders']:>12} | ${row['Average_Profit_per_Order']:>16,.2f} | {row['Profit_Margin_%']:>14.2f}%")

print(divider)



--- Sales & Profitability Analysis by Category ---
Category     |   Total Sales |  Total Profit | Total Orders |  Avg Profit/Order |   Profit Margin
-------------------------------------------------------------------------------------------------
Clothing     | $  139,054.00 | $   11,163.00 |          393 | $           28.40 |           8.03%
Electronics  | $  165,267.00 | $   10,494.00 |          204 | $           51.44 |           6.35%
Furniture    | $  127,181.00 | $    2,298.00 |          186 | $           12.35 |           1.81%
-------------------------------------------------------------------------------------------------


Step 9: Print Performance Summary

In [None]:
print("\n--- Performance Summary ---")

# Get top and bottom performers from the sorted DataFrame
top_performer = category_analysis_sorted.iloc[0]
bottom_performer = category_analysis_sorted.iloc[-1]

# Print Top Performer details
print(f"\n[Top Performing Category: {top_performer['Category']}]")
print(f"  - Key Metric: Highest Profit Margin at {top_performer['Profit_Margin_%']:.2f}%.")
print(f"  - This category generates ${top_performer['Profit_Margin_%']:.2f} in profit for every $100 in sales.")
print(f"  - Total Profit: ${top_performer['Total_Profit']:,.2f}")

# Print Underperformer details
print(f"\n[Underperforming Category: {bottom_performer['Category']}]")
print(f"  - Key Metric: Lowest Profit Margin at {bottom_performer['Profit_Margin_%']:.2f}%.")
print(f"  - Average Profit per Order is also the lowest at ${bottom_performer['Average_Profit_per_Order']:,.2f}.")
print(f"  - Total Profit: ${bottom_performer['Total_Profit']:,.2f}")


--- Performance Summary ---

[Top Performing Category: Clothing]
  - Key Metric: Highest Profit Margin at 8.03%.
  - This category generates $8.03 in profit for every $100 in sales.
  - Total Profit: $11,163.00

[Underperforming Category: Furniture]
  - Key Metric: Lowest Profit Margin at 1.81%.
  - Average Profit per Order is also the lowest at $12.35.
  - Total Profit: $2,298.00


Step 10: Print Suggested Reasons

In [None]:
print("\n--- Suggested Reasons for Performance Differences -----")

# Get the 'Electronics' row for the special callout
electronics_row = category_analysis_sorted.loc[category_analysis_sorted['Category'] == 'Electronics'].iloc[0]

# Print reasons for the Top Performer
print(f"\n1. {top_performer['Category']} (Top Performer):")
print(f"  - High Margin (8.03%): '{top_performer['Category']}' likely has lower operational costs (e.g., storage, shipping) and/or higher pricing power (brand appeal, fashion trends).")
print(f"  - High Order Volume ({top_performer['Total_Orders']} orders): This category sees frequent purchases, possibly due to lower price points or being a common necessity, which drives consistent overall profit.")

# Print notes on Electronics
print(f"\n2. Electronics (Strong Sales):")
print(f"  - While not the top in margin (6.35%), Electronics has the highest Total Sales (${electronics_row['Total_Sales']:,.2f}) and the highest Average Profit per Order (${electronics_row['Average_Profit_per_Order']:,.2f}).")
print(f"  - This suggests high-value items, but the lower margin could be due to higher cost-of-goods or strong price competition.")

# Print reasons for the Underperformer
print(f"\n3. {bottom_performer['Category']} (Underperformer):")
print(f"  - Critically Low Margin (1.81%): '{bottom_performer['Category']}' is the least profitable. This is very likely due to high operational and logistical costs (e.g., shipping bulky items, storage, damage/returns).")
print(f"  - Low Profit Per Order (${bottom_performer['Average_Profit_per_Order']:,.2f}): The profit on each order is minimal, suggesting that high overhead costs are consuming nearly all the sales revenue.")
print(f"  - Recommendation: This category's pricing, supplier costs, and shipping logistics should be reviewed urgently.")



--- Suggested Reasons for Performance Differences ---

1. Clothing (Top Performer):
  - High Margin (8.03%): 'Clothing' likely has lower operational costs (e.g., storage, shipping) and/or higher pricing power (brand appeal, fashion trends).
  - High Order Volume (393 orders): This category sees frequent purchases, possibly due to lower price points or being a common necessity, which drives consistent overall profit.

2. Electronics (Strong Sales):
  - While not the top in margin (6.35%), Electronics has the highest Total Sales ($165,267.00) and the highest Average Profit per Order ($51.44).
  - This suggests high-value items, but the lower margin could be due to higher cost-of-goods or strong price competition.

3. Furniture (Underperformer):
  - Critically Low Margin (1.81%): 'Furniture' is the least profitable. This is very likely due to high operational and logistical costs (e.g., shipping bulky items, storage, damage/returns).
  - Low Profit Per Order ($12.35): The profit on each 