In [1]:
import pandas as pd

# Creating a sample DataFrame with some categorical data
data = {
    'Product': ['Phone', 'Laptop', 'Headphones', 'Tablet', 'Phone', 'Laptop', 'Headphones', 'Tablet', 'Phone', 'Tablet'],
    'Category': ['Electronics', 'Electronics', 'Accessories', 'Electronics', 'Electronics', 'Electronics', 'Accessories', 'Electronics', 'Electronics', 'Electronics'],
    'Sales': [300, 700, 100, 400, 350, 750, 120, 420, 360, 430],
    'Cost': [200, 500, 70, 250, 220, 520, 90, 260, 230, 270],
    'Profit': [100, 200, 30, 150, 130, 230, 30, 160, 130, 160]
}

# Creating DataFrame
sales = pd.DataFrame(data)


In [13]:
sales['Product'] = sales['Product'].astype('category')
print("Step 1: Convert to Categorical")
print(sales.dtypes)

Step 1: Convert to Categorical
Product     category
Category    category
Sales          int64
Cost           int64
Profit         int64
dtype: object


In [14]:
# Step 2: View the unique categories in the 'Category' column
print("\nStep 2: View Categories")
print(sales['Product'].cat.categories)


Step 2: View Categories
Index(['Headphones', 'Laptop', 'Phone', 'Tablet'], dtype='object')


In [16]:
# Step 3: Get the category codes (numerical representation of categories)
sales['Category_Code'] = sales['Product'].cat.codes
print("\nStep 3: Product Codes")
print(sales[['Product', 'Category_Code']])


Step 3: Product Codes
      Product  Category_Code
0       Phone              2
1      Laptop              1
2  Headphones              0
3      Tablet              3
4       Phone              2
5      Laptop              1
6  Headphones              0
7      Tablet              3
8       Phone              2
9      Tablet              3


In [17]:
# Step 4: Count occurrences of each category
print("\nStep 4: Count Category Occurrences")
print(sales['Product'].value_counts())


Step 4: Count Category Occurrences
Product
Tablet        3
Phone         3
Laptop        2
Headphones    2
Name: count, dtype: int64


In [20]:
# Step 5: Rename categories (if necessary)
sales['Product'] = sales['Product'].cat.rename_categories({
    'Headphones': 'Airpods'
})
print("\nStep 5: Rename Categories")
print(sales['Product'].head())


Step 5: Rename Categories
0      Phone
1     Laptop
2    Airpods
3     Tablet
4      Phone
Name: Product, dtype: category
Categories (4, object): ['Airpods', 'Laptop', 'Phone', 'Tablet']


In [29]:
# Step 6: Add new categories to the existing ones
sales['Product'] = sales['Product'].cat.add_categories(['Gift Cards'])
print("\nStep 6: Add New Categories")
print(sales['Product'].cat.categories)


Step 6: Add New Categories
Index(['Airpods', 'Laptop', 'Phone', 'Tablet', 'Gift Cards'], dtype='object')


In [30]:
# Step 7: Remove unused categories
sales['Product'] = sales['Product'].cat.remove_categories(['Gift Cards'])
print("\nStep 7: Remove Unused Categories")
print(sales['Product'].cat.categories)


Step 7: Remove Unused Categories
Index(['Airpods', 'Laptop', 'Phone', 'Tablet'], dtype='object')


In [41]:
''' Step 8: Set custom order for categories
custom_order = ['Airpods', 'Laptop']
sales['Product'] = sales['Product'].cat.set_categories(custom_order, ordered=True)
print("\nStep 8: Set Custom Order for Categories")
print(sales['Product'].head())'''

' Step 8: Set custom order for categories\ncustom_order = [\'Airpods\', \'Laptop\']\nsales[\'Product\'] = sales[\'Product\'].cat.set_categories(custom_order, ordered=True)\nprint("\nStep 8: Set Custom Order for Categories")\nprint(sales[\'Product\'].head())'

In [42]:
# Grouping by the 'Category' column and calculating the total sales for each category
grouped_data = sales.groupby('Product')['Profit'].sum()

print(grouped_data)

Product
Airpods     60
Laptop     430
Name: Profit, dtype: int64


  grouped_data = sales.groupby('Product')['Profit'].sum()


In [43]:
# Grouping by 'Category' and calculating multiple aggregations for 'Sales' and 'Cost'
grouped_data = sales.groupby('Category').agg({
    'Sales': ['sum', 'mean', 'max'],
    'Cost': 'sum'
})

print(grouped_data)


                   Sales               Cost
                     sum    mean  max   sum
Category                                   
Accessory Items      220  110.00  120   160
Electronic Devices  3710  463.75  750  2450


  grouped_data = sales.groupby('Category').agg({


In [44]:
# Grouping by both 'Category' and 'Product' and calculating the total 'Sales'
grouped_data = sales.groupby(['Category', 'Product'])['Sales'].sum()

print(grouped_data)


Category            Product
Accessory Items     Airpods     220
                    Laptop        0
Electronic Devices  Airpods       0
                    Laptop     1450
Name: Sales, dtype: int64


  grouped_data = sales.groupby(['Category', 'Product'])['Sales'].sum()


In [45]:
grouped_data = sales.groupby('Category').agg({'Sales': 'sum'}).reset_index()
print(grouped_data)


             Category  Sales
0     Accessory Items    220
1  Electronic Devices   3710


  grouped_data = sales.groupby('Category').agg({'Sales': 'sum'}).reset_index()
