In [2]:
import pandas as pd

data = {'Product': ['A', 'B', 'A', 'C', 'B', 'C', 'A', 'B'],
        'Category': ['Electronics', 'Clothing', 'Electronics', 'Home Goods', 'Clothing', 'Home Goods', 'Electronics', 'Clothing'],
        'Region': ['North', 'South', 'East', 'West', 'North', 'East', 'South', 'West'],
        'Price': [100, 50, 120, 30, 60, 40, 110, 55],
        'Units Sold': [10, 25, 15, 30, 20, 35, 12, 22]}

df = pd.DataFrame(data)
print("DataFrame (now 'df'):\n", df)

DataFrame (now 'df'):
   Product     Category Region  Price  Units Sold
0       A  Electronics  North    100          10
1       B     Clothing  South     50          25
2       A  Electronics   East    120          15
3       C   Home Goods   West     30          30
4       B     Clothing  North     60          20
5       C   Home Goods   East     40          35
6       A  Electronics  South    110          12
7       B     Clothing   West     55          22


In [3]:
df

Unnamed: 0,Product,Category,Region,Price,Units Sold
0,A,Electronics,North,100,10
1,B,Clothing,South,50,25
2,A,Electronics,East,120,15
3,C,Home Goods,West,30,30
4,B,Clothing,North,60,20
5,C,Home Goods,East,40,35
6,A,Electronics,South,110,12
7,B,Clothing,West,55,22


In [4]:
grouped_by_region = df.groupby('Region')


In [5]:
grouped_by_region['Price'].mean()

Region
East     80.0
North    80.0
South    80.0
West     42.5
Name: Price, dtype: float64

In [6]:
# Group by 'Category' and calculate the mean and median price
price_summary_per_category = df.groupby('Category')['Price'].agg(['mean', 'median'])
print("\nMean and Median Price per Category:\n", price_summary_per_category)

# Group by 'Region' and calculate the sum and count of 'Units Sold'
units_summary_per_region = df.groupby('Region')['Units Sold'].agg(['sum', 'count'])
print("\nSum and Count of Units Sold per Region:\n", units_summary_per_region)


Mean and Median Price per Category:
               mean  median
Category                  
Clothing      55.0    55.0
Electronics  110.0   110.0
Home Goods    35.0    35.0

Sum and Count of Units Sold per Region:
         sum  count
Region            
East     50      2
North    30      2
South    37      2
West     52      2


In [7]:
df

Unnamed: 0,Product,Category,Region,Price,Units Sold
0,A,Electronics,North,100,10
1,B,Clothing,South,50,25
2,A,Electronics,East,120,15
3,C,Home Goods,West,30,30
4,B,Clothing,North,60,20
5,C,Home Goods,East,40,35
6,A,Electronics,South,110,12
7,B,Clothing,West,55,22


In [8]:
df.groupby('Product')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0A2B3CA0>

In [9]:
df.groupby('Product')['Units Sold'].sum()

Product
A    37
B    67
C    65
Name: Units Sold, dtype: int64

In [10]:
df.groupby('Product').sum()

Unnamed: 0_level_0,Price,Units Sold
Product,Unnamed: 1_level_1,Unnamed: 2_level_1
A,330,37
B,165,67
C,70,65


In [18]:
#Old Method 
#df.groupby('Product')['Price','Units Sold'].agg(['mean', 'median'])

In [12]:
df.groupby('Product').agg(
                        Price_sum=('Price', 'sum'), 
                         Price_max=('Price', 'max'), 
                        Units_Sold=('Units Sold', 'sum'))

Unnamed: 0_level_0,Price_sum,Price_max,Units_Sold
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,330,120,37
B,165,60,67
C,70,40,65


In [13]:
average_price_per_category = df.groupby('Category').agg(
                                                         price=('Price', 'mean' ))

print("\nAverage Price per Category (Index is 'Category'):\n", average_price_per_category)
print("\nType of the index:", type(average_price_per_category.index))


Average Price per Category (Index is 'Category'):
              price
Category          
Clothing      55.0
Electronics  110.0
Home Goods    35.0

Type of the index: <class 'pandas.core.indexes.base.Index'>


In [14]:
category_analysis = df.groupby('Category').agg(Average_Price=('Price', 'mean'),
                                                 Total_Units_Sold=('Units Sold', 'sum'))
print("\nCategory Analysis (Index is 'Category'):\n", category_analysis)
print("\nType of the index:", type(category_analysis.index))

region_category_analysis = df.groupby(['Region', 'Category']).agg(Average_Price=('Price', 'mean'),
                                                                 Total_Units_Sold=('Units Sold', 'sum'))
print("\nRegion and Category Analysis (MultiIndex):\n", region_category_analysis)
print("\nType of the index:", type(region_category_analysis.index))


Category Analysis (Index is 'Category'):
              Average_Price  Total_Units_Sold
Category                                    
Clothing              55.0                67
Electronics          110.0                37
Home Goods            35.0                65

Type of the index: <class 'pandas.core.indexes.base.Index'>

Region and Category Analysis (MultiIndex):
                     Average_Price  Total_Units_Sold
Region Category                                    
East   Electronics          120.0                15
       Home Goods            40.0                35
North  Clothing              60.0                20
       Electronics          100.0                10
South  Clothing              50.0                25
       Electronics          110.0                12
West   Clothing              55.0                22
       Home Goods            30.0                30

Type of the index: <class 'pandas.core.indexes.multi.MultiIndex'>


In [15]:
category_analysis_no_index = df.groupby(['Region','Category'], as_index=False).agg(
                                                                                Average_Price=('Price', 'mean'),
                                                                                Total_Units_Sold=('Units Sold', 'sum')
)
print("\nCategory Analysis (No Index):\n", category_analysis_no_index)
print("\nType of the index:", type(category_analysis_no_index.index))


Category Analysis (No Index):
   Region     Category  Average_Price  Total_Units_Sold
0   East  Electronics          120.0                15
1   East   Home Goods           40.0                35
2  North     Clothing           60.0                20
3  North  Electronics          100.0                10
4  South     Clothing           50.0                25
5  South  Electronics          110.0                12
6   West     Clothing           55.0                22
7   West   Home Goods           30.0                30

Type of the index: <class 'pandas.core.indexes.range.RangeIndex'>


In [16]:
category_analysis_no_index

Unnamed: 0,Region,Category,Average_Price,Total_Units_Sold
0,East,Electronics,120.0,15
1,East,Home Goods,40.0,35
2,North,Clothing,60.0,20
3,North,Electronics,100.0,10
4,South,Clothing,50.0,25
5,South,Electronics,110.0,12
6,West,Clothing,55.0,22
7,West,Home Goods,30.0,30
