In [2]:
import pandas as pd
import numpy as np

# Pandas 3: Data Aggregation and Merging 
## Part 3.1:  Data Aggregation

### Aggregation: 
  - Summary statistics
  - GroupBy

### 1- Aggregating Statistics
<img src="images/img1.png" width="300"> 


In [3]:
a=[[76,52,57], [81, 73, 97], [90, 92, 80], [np.NaN, 73, 92], [81, np.NaN, 52], [65, 95, 61]]
df=pd.DataFrame(a, columns=['A','B', 'C'])
df

Unnamed: 0,A,B,C
0,76.0,52.0,57
1,81.0,73.0,97
2,90.0,92.0,80
3,,73.0,92
4,81.0,,52
5,65.0,95.0,61


### Count

In [4]:
## Single Column
df['A'].count()

5

In [5]:
## Multiple Column
df[['A','C']].count()

A    5
C    6
dtype: int64

In [6]:
## All columns - Column-wise
df.count()

A    5
B    5
C    6
dtype: int64

### Min

In [7]:
## Single Column
df['A'].min()

65.0

In [8]:
## Multiple Column
df[['A','C']].min()

A    65.0
C    52.0
dtype: float64

In [9]:
## All columns - Column-wise
df.min()

A    65.0
B    52.0
C    52.0
dtype: float64

### Max 

In [10]:
## Single Column
df['A'].max()

90.0

In [11]:
## Multiple Column
df[['A','C']].max()

A    90.0
C    97.0
dtype: float64

In [12]:
## All columns - Column-wise
df.max()

A    90.0
B    95.0
C    97.0
dtype: float64

### Sum

In [13]:
## Single Column
df['A'].sum()

393.0

In [14]:
## Multiple Column
df[['A','C']].sum()

A    393.0
C    439.0
dtype: float64

In [15]:
## All columns - Column-wise
df.sum()

A    393.0
B    385.0
C    439.0
dtype: float64

### Variance

In [16]:
## Single Column
df['A'].var()

83.3

In [17]:
## Multiple Column
df[['A','C']].var()

A     83.300000
C    365.366667
dtype: float64

In [18]:
## All columns - Column-wise
df.var()

A     83.300000
B    301.500000
C    365.366667
dtype: float64

### Standard Deviation

In [19]:
## Single Column
df['A'].std()

9.126883367283709

In [20]:
## Multiple Column
df[['A','C']].std()

A     9.126883
C    19.114567
dtype: float64

In [21]:
## All columns - Column-wise
df.std()

A     9.126883
B    17.363755
C    19.114567
dtype: float64

### Mean

In [22]:
## Single Column
df['A'].mean()

78.6

In [23]:
## Multiple Column
df[['A','C']].mean()

A    78.600000
C    73.166667
dtype: float64

In [24]:
## All columns - Column-wise
df.mean()

A    78.600000
B    77.000000
C    73.166667
dtype: float64

### Median

In [25]:
## Single Column
df['A'].median()

81.0

In [26]:
## Multiple Column
df[['A','C']].median()

A    81.0
C    70.5
dtype: float64

In [27]:
## All columns - Column-wise
df.median()

A    81.0
B    73.0
C    70.5
dtype: float64

### Mode

In [28]:
## Single Column
df['A'].mode()

0    81.0
Name: A, dtype: float64

In [29]:
df['A'].mode().iloc[0]

81.0

In [30]:
## Multiple Column
df[['A','B']].mode()

Unnamed: 0,A,B
0,81.0,73.0


In [31]:
## All columns - Column-wise
df.mode()

Unnamed: 0,A,B,C
0,81.0,73.0,52
1,,,57
2,,,61
3,,,80
4,,,92
5,,,97


### Statistical summary

In [32]:
## Single Column
df['A'].describe()

count     5.000000
mean     78.600000
std       9.126883
min      65.000000
25%      76.000000
50%      81.000000
75%      81.000000
max      90.000000
Name: A, dtype: float64

In [33]:
## Multiple Column
df[['A','C']].describe()

Unnamed: 0,A,C
count,5.0,6.0
mean,78.6,73.166667
std,9.126883,19.114567
min,65.0,52.0
25%,76.0,58.0
50%,81.0,70.5
75%,81.0,89.0
max,90.0,97.0


In [34]:
df[['A','C']].describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,5.0,78.6,9.126883,65.0,76.0,81.0,81.0,90.0
C,6.0,73.166667,19.114567,52.0,58.0,70.5,89.0,97.0


In [35]:
## All columns - Column-wise
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,5.0,78.6,9.126883,65.0,76.0,81.0,81.0,90.0
B,5.0,77.0,17.363755,52.0,73.0,73.0,92.0,95.0
C,6.0,73.166667,19.114567,52.0,58.0,70.5,89.0,97.0


### agg() function 

In [36]:
df.agg(['sum','min'])

Unnamed: 0,A,B,C
sum,393.0,385.0,439
min,65.0,52.0,52


In [38]:
df.agg(['sum','min'], axis=0)

Unnamed: 0,A,B,C
sum,393.0,385.0,439
min,65.0,52.0,52


In [37]:
df.agg(['sum','min'], axis=1)

Unnamed: 0,sum,min
0,185.0,52.0
1,251.0,73.0
2,262.0,80.0
3,165.0,73.0
4,133.0,52.0
5,221.0,61.0


In [39]:
df.agg({'A':['sum','min'], 'B':['min', 'max']})

Unnamed: 0,A,B
sum,393.0,
min,65.0,52.0
max,,95.0


## All columns - Row-wise

In [40]:
df.sum(axis=1)

0    185.0
1    251.0
2    262.0
3    165.0
4    133.0
5    221.0
dtype: float64

In [41]:
df.mean(axis=1)

0    61.666667
1    83.666667
2    87.333333
3    82.500000
4    66.500000
5    73.666667
dtype: float64

In [42]:
df.loc[2:3].mean(axis=1)

2    87.333333
3    82.500000
dtype: float64

In [None]:
df.iloc[3:5].mean(axis=1)

### 2- Groupby
- Aggregating statistics grouped by category
- The groupby() method is applied on one or more columns to make a group per category.

<img src="images/img2.png" width="500"> 

In [43]:
columns=['item', 'type', 'color', 'quantity']
data=[ ('S-56', 'A', 'Red', 234), ('S-57', 'A', 'Blue', 432),
      ('S-58', 'A', 'Orange', 902), ('S-59', 'A', 'Red', 340), 
      ('S-60', 'B', 'Yellow', 253), ('S-61', 'B', 'Red', 232), 
      ('S-62', 'C', 'Green', 1042), ('S-63', 'C', 'Green', 1204),
      ('S-64', 'B', 'Yellow', 432), ('S-65', 'C', 'Green', 985) ]
items=pd.DataFrame(data, columns=columns)
items

Unnamed: 0,item,type,color,quantity
0,S-56,A,Red,234
1,S-57,A,Blue,432
2,S-58,A,Orange,902
3,S-59,A,Red,340
4,S-60,B,Yellow,253
5,S-61,B,Red,232
6,S-62,C,Green,1042
7,S-63,C,Green,1204
8,S-64,B,Yellow,432
9,S-65,C,Green,985


In [46]:
items.groupby('type').count()['item']

type
A    4
B    3
C    3
Name: item, dtype: int64

- In a store, there are many items..
- An item has a Type and a Color ..
- We have a dataset of items, with the quantity of each item..


### Question 1 :  find the sum of quantities for each Type ?
**Approach:**
- Split the dataset into groups (per Type)
- Apply sum() function to each group independently
- Combine the results into a data structure

In [47]:
items.groupby('type').sum()

Unnamed: 0_level_0,quantity
type,Unnamed: 1_level_1
A,1908
B,917
C,3231


### Question 2:  find the sum of quantities for each Color ?

In [48]:
items.groupby('color').sum()

Unnamed: 0_level_0,quantity
color,Unnamed: 1_level_1
Blue,432
Green,3231
Orange,902
Red,806
Yellow,685


### Question 3:  How many items are there of each Type?

In [49]:
items.groupby('type').count()

Unnamed: 0_level_0,item,color,quantity
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,4,4,4
B,3,3,3
C,3,3,3


In [50]:
items.groupby('type').count()['item']

type
A    4
B    3
C    3
Name: item, dtype: int64

In [51]:
items.groupby('type').count()['item'].reset_index()

Unnamed: 0,type,item
0,A,4
1,B,3
2,C,3


In [53]:
df_test=items.groupby('type').count()
df_test

Unnamed: 0_level_0,item,color,quantity
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,4,4,4
B,3,3,3
C,3,3,3


In [54]:
df_test.columns

Index(['item', 'color', 'quantity'], dtype='object')

In [55]:
df_test.index

Index(['A', 'B', 'C'], dtype='object', name='type')

In [56]:
df_test1=items.groupby('type').count()['item'].reset_index()
df_test1

Unnamed: 0,type,item
0,A,4
1,B,3
2,C,3


In [57]:
df_test1.columns

Index(['type', 'item'], dtype='object')

In [58]:
df_test1.index

RangeIndex(start=0, stop=3, step=1)

### Question 4:  How many items are there of each Type?
value_counts is a convenient shortcut to count the number of entries in each category

In [59]:
items['type'].value_counts()

A    4
B    3
C    3
Name: type, dtype: int64

In [60]:
items.groupby('type').count()['item']

type
A    4
B    3
C    3
Name: item, dtype: int64

### Question 5: How many items are there for each (Type,Color ) combination ?

In [61]:
items.groupby(['type','color']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,item,quantity
type,color,Unnamed: 2_level_1,Unnamed: 3_level_1
A,Blue,1,1
A,Orange,1,1
A,Red,2,2
B,Red,1,1
B,Yellow,2,2
C,Green,3,3


In [62]:
df1=items.groupby(['type','color']).count()

In [63]:
df1.columns

Index(['item', 'quantity'], dtype='object')

In [64]:
df1.index

MultiIndex([('A',   'Blue'),
            ('A', 'Orange'),
            ('A',    'Red'),
            ('B',    'Red'),
            ('B', 'Yellow'),
            ('C',  'Green')],
           names=['type', 'color'])

In [65]:
items.groupby(['type','color']).count()['item'].reset_index()

Unnamed: 0,type,color,item
0,A,Blue,1
1,A,Orange,1
2,A,Red,2
3,B,Red,1
4,B,Yellow,2
5,C,Green,3


### Question 6: Find the sum of quantities for each (Type,Color ) combination ?

In [66]:
items.groupby(['type','color']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,quantity
type,color,Unnamed: 2_level_1
A,Blue,432
A,Orange,902
A,Red,574
B,Red,232
B,Yellow,685
C,Green,3231


In [67]:
items.groupby(['type','color']).sum().reset_index()

Unnamed: 0,type,color,quantity
0,A,Blue,432
1,A,Orange,902
2,A,Red,574
3,B,Red,232
4,B,Yellow,685
5,C,Green,3231


We can also choose to include NA in group keys or not by setting `dropna` parameter, 
the default setting is `True`


In [68]:
l=[[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
df=pd.DataFrame(l, columns=["a", "b", "c"])
df

Unnamed: 0,a,b,c
0,1,2.0,3
1,1,,4
2,2,1.0,3
3,1,2.0,2


In [69]:
df.groupby(by=["b"]).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5


In [70]:
df.groupby(by=["b"], dropna=False).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5
,1,4


## Part 3.2:  Data Merging
- **Concat**
   - Vertically
   - Horizontally
   
   
<img src="images/img3.png" width="400"> 
   
- **Merge**
   - Inner
   - Left
   - Right
   - Outer
   
<img src="images/img4.png" width="400"> 



### 1- Concat 
Concatenate pandas DataFrames along a particular axis 
<img src="images/img5.png" width="500"> 


#### 1.1- Concat - vertical (axis=0)

In [73]:
columns=['Department', 'Year', 'Budget']
d1=[ ('Operations', 2019, 35000), ('Research', 2019, 45000),
     ('Development', 2019, 45000), ('Human Resources', 2019, 25800)]
df1=pd.DataFrame(d1,columns=columns)
df1

Unnamed: 0,Department,Year,Budget
0,Operations,2019,35000
1,Research,2019,45000
2,Development,2019,45000
3,Human Resources,2019,25800


In [74]:
d2=[ ('Operations', 2020, 36500), ('Research', 2020, 44000),
     ('Development', 2020, 55000), ('Human Resources', 2020, 37500)]
df2=pd.DataFrame(d2,columns=columns)
df2

Unnamed: 0,Department,Year,Budget
0,Operations,2020,36500
1,Research,2020,44000
2,Development,2020,55000
3,Human Resources,2020,37500


In [75]:
## check index
df=pd.concat([df1, df2])
df

Unnamed: 0,Department,Year,Budget
0,Operations,2019,35000
1,Research,2019,45000
2,Development,2019,45000
3,Human Resources,2019,25800
0,Operations,2020,36500
1,Research,2020,44000
2,Development,2020,55000
3,Human Resources,2020,37500


In [76]:
df=pd.concat([df1, df2], ignore_index=True)
df

Unnamed: 0,Department,Year,Budget
0,Operations,2019,35000
1,Research,2019,45000
2,Development,2019,45000
3,Human Resources,2019,25800
4,Operations,2020,36500
5,Research,2020,44000
6,Development,2020,55000
7,Human Resources,2020,37500


In [77]:
columns= ['Department', 'Year', 'Budget', 'Manager']
d3=[ ('Operations', 2021, 44000, 'Dirk'), ('Research', 2021, 52000, 'Elisa'),
     ('Development', 2021, 37000, 'Jan'), ('Human Resources', 2021, 40500, 'Mary')]
df3=pd.DataFrame(d3, columns=columns)
df3

Unnamed: 0,Department,Year,Budget,Manager
0,Operations,2021,44000,Dirk
1,Research,2021,52000,Elisa
2,Development,2021,37000,Jan
3,Human Resources,2021,40500,Mary


In [78]:
df=pd.concat([df1, df2, df3], ignore_index=True)
df

Unnamed: 0,Department,Year,Budget,Manager
0,Operations,2019,35000,
1,Research,2019,45000,
2,Development,2019,45000,
3,Human Resources,2019,25800,
4,Operations,2020,36500,
5,Research,2020,44000,
6,Development,2020,55000,
7,Human Resources,2020,37500,
8,Operations,2021,44000,Dirk
9,Research,2021,52000,Elisa


#### 1.2- Concat - horizontal (axis=1)
<img src="images/img6.png" width="400"> 

In [79]:
index=['Math', 'Physics', 'English']
df1= {'S1':[65, 80, 85], 'S2':[70, 90, 76] }
df1=pd.DataFrame(df1, index=index)
df1

Unnamed: 0,S1,S2
Math,65,70
Physics,80,90
English,85,76


In [80]:
df2= {'S3':[53, 84, 95], 'S4':[72, 92, 63] }
df2=pd.DataFrame(df2, index=index)
df2

Unnamed: 0,S3,S4
Math,53,72
Physics,84,92
English,95,63


In [81]:
df=pd.concat([df1, df2], axis=1)
df

Unnamed: 0,S1,S2,S3,S4
Math,65,70,53,72
Physics,80,90,84,92
English,85,76,95,63


In [82]:
index=['Math', 'Physics', 'History', 'English']
df3= {'S5':[69, 84, 77, 90], 'S6':[85, 72, 52, 69], 'S7':[33, 81, 65, 73]}
df3=pd.DataFrame(df3, index=index)
df3

Unnamed: 0,S5,S6,S7
Math,69,85,33
Physics,84,72,81
History,77,52,65
English,90,69,73


In [83]:
df=pd.concat([df1, df2, df3], axis=1)
df

Unnamed: 0,S1,S2,S3,S4,S5,S6,S7
Math,65.0,70.0,53.0,72.0,69,85,33
Physics,80.0,90.0,84.0,92.0,84,72,81
English,85.0,76.0,95.0,63.0,90,69,73
History,,,,,77,52,65


### 2- Merge
- An operation that combines two dataframes into a new dataframe, by matching one or more columns (aka. keys)  from both input dataframes.
- Each row in the new dataframe is a combination of two rows: one from each input dataframes.

<img src="images/img7.png" width="600"> 

<img src="images/img8.png" width="600"> 

In [84]:
columns=['City', 'Country']
df_1=[ ('Munich', 'Germany'), ('Liverpool', 'UK'),
     ('Lyon', 'France'), ('Frankfurt', 'Germany'), 
     ('Napoli', 'Italy'), ('London', 'UK') ]
df_1=pd.DataFrame(df_1, columns=columns)
df_1

Unnamed: 0,City,Country
0,Munich,Germany
1,Liverpool,UK
2,Lyon,France
3,Frankfurt,Germany
4,Napoli,Italy
5,London,UK


In [85]:
columns=['Country', 'Population', 'Area']
df_2=[ ('Germany', 83783942, 357588), ('UK', 67886011, 242495),
     ('France', 65273511, 543940), ('Italy', 60317116, 301340) ]
df_2=pd.DataFrame(df_2, columns=columns)
df_2

Unnamed: 0,Country,Population,Area
0,Germany,83783942,357588
1,UK,67886011,242495
2,France,65273511,543940
3,Italy,60317116,301340


In [86]:
df_result = df_1.merge(df_2, on="Country")
df_result

Unnamed: 0,City,Country,Population,Area
0,Munich,Germany,83783942,357588
1,Frankfurt,Germany,83783942,357588
2,Liverpool,UK,67886011,242495
3,London,UK,67886011,242495
4,Lyon,France,65273511,543940
5,Napoli,Italy,60317116,301340


In [None]:
df_result = pd.merge(df_1, df_2, on="Country")
df_result

<img src="images/img9.png" width="700"> 

<img src="images/img10.png" width="700"> 

<img src="images/img11.png" width="700"> 

<img src="images/img12.png" width="700"> 

- In the previous example, we have complete match between the two dataframes:
- Both dataframes have the same set of countries..
- But, what happens when there is no complete match ?!

<img src="images/img13.png" width="400"> 


- We want to merge these dataframes to know the fathers and mothers of children..
- There are some common children in both tables. But also there are non-matching children!
- ????? 


#### There are four types of merge:
  - **inner:** take only matching rows from both tables
  - **left:** take matching rows from both plus non-matching from left table
  - **right:** take matching rows from both plus non-matching from right table
  - **outer:** take matching rows plus non-matching from both table

<img src="images/img14.png" width="600"> 


In [87]:
data1=[('Steve', 'Frank'), ('Greg', 'Kim'), ('Greg', 'Phil'), ('Frank', 'Andy'), ('Frank', 'Rob')]
dff=pd.DataFrame(data1, columns= ['Father', 'Child'])
dff

Unnamed: 0,Father,Child
0,Steve,Frank
1,Greg,Kim
2,Greg,Phil
3,Frank,Andy
4,Frank,Rob


In [88]:
data2=[('Lisa', 'Mary'), ('Lisa', 'Greg'), ('Anne', 'Kim'),
       ('Anne', 'Phil'), ('Mary', 'Andy'), ('Mary', 'Rob')]
dfm=pd.DataFrame(data2, columns= ['Mother', 'Child'])
dfm

Unnamed: 0,Mother,Child
0,Lisa,Mary
1,Lisa,Greg
2,Anne,Kim
3,Anne,Phil
4,Mary,Andy
5,Mary,Rob


#### a. Merge - how='inner'
- Only matching rows from both tables are included
- Non-matching rows are excluded

<img src="images/img15.png" width="300"> 


In [89]:
dff.merge(dfm, on='Child')

Unnamed: 0,Father,Child,Mother
0,Greg,Kim,Anne
1,Greg,Phil,Anne
2,Frank,Andy,Mary
3,Frank,Rob,Mary


<img src="images/img16.png" width="300"> 


#### b. Merge - how='left'
<img src="images/img17.png" width="300"> 

- Matching rows from both tables are included
- Non-matching rows from **left** table are **included**
- Non-matching rows from **right** table are **excluded**

In [90]:
dff.merge(dfm, on='Child', how='left')

Unnamed: 0,Father,Child,Mother
0,Steve,Frank,
1,Greg,Kim,Anne
2,Greg,Phil,Anne
3,Frank,Andy,Mary
4,Frank,Rob,Mary


<img src="images/img18.png" width="300"> 

#### c. Merge - how='right'
- Matching rows from both tables are included
- Non-matching rows from left table are excluded
- Non-matching rows from right table are included

<img src="images/img19.png" width="300"> 

In [91]:
dff.merge(dfm, on='Child', how='right')

Unnamed: 0,Father,Child,Mother
0,,Mary,Lisa
1,,Greg,Lisa
2,Greg,Kim,Anne
3,Greg,Phil,Anne
4,Frank,Andy,Mary
5,Frank,Rob,Mary


<img src="images/img20.png" width="300"> 

#### d. Merge - how='outer'
- Matching rows from both tables are included
- Non-matching rows from **left** table are **included**
- Non-matching rows from **right** table are **included**
<img src="images/img21.png" width="300"> 

In [92]:
dff.merge(dfm, on='Child', how='outer')

Unnamed: 0,Father,Child,Mother
0,Steve,Frank,
1,Greg,Kim,Anne
2,Greg,Phil,Anne
3,Frank,Andy,Mary
4,Frank,Rob,Mary
5,,Mary,Lisa
6,,Greg,Lisa


<img src="images/img22.png" width="300"> 

####  e. Merge summary 
- **inner join**
  - only matching rows will be retained, 
  - rows in the left DataFrame without a match in the key column of the right DataFrame will be discarded.
  - rows in the right DataFrame without a match in the key column of the left DataFrame will be discarded.
- **left join** 
  - all rows from the left DataFrame will be retained, 
  - rows in the right DataFrame without a match in the key column of the left DataFrame will be discarded.
- **right join**
  - all rows from the right DataFrame will be retained, 
  - rows in the left DataFrame without a match in the key column of the right DataFrame will be discarded.
- **outer  join**
  - all rows, matching and non-matching, from both DataFrame will be retained, 

<img src="images/img23.png" width="600"> 

#### f. Merge- columns of different names: 
- In previous examples, we merged tables using columns with the same name (Country, Child).
- We can also merge tables using columns of different names


In [93]:
columns=['Name', 'Age', 'Income']
data3=[('Andy', 27, 21), ('Rob', 25, 15), ('Mary', 55, 42), ('Anne', 50, 35),
       ('Phil', 26, 30), ('Greg', 50, 40), ('Frank', 57, 20), ('Kim', 30, 41), 
        ('Mike', 85, 35), ('Lisa', 75, 87), ('Steve', 80, 23)]
dfp=pd.DataFrame(data3, columns=columns)
dfp

Unnamed: 0,Name,Age,Income
0,Andy,27,21
1,Rob,25,15
2,Mary,55,42
3,Anne,50,35
4,Phil,26,30
5,Greg,50,40
6,Frank,57,20
7,Kim,30,41
8,Mike,85,35
9,Lisa,75,87


In [94]:
dff

Unnamed: 0,Father,Child
0,Steve,Frank
1,Greg,Kim
2,Greg,Phil
3,Frank,Andy
4,Frank,Rob


In [95]:
df=dfp.merge(dff, left_on='Name', right_on='Child')
df

Unnamed: 0,Name,Age,Income,Father,Child
0,Andy,27,21,Frank,Andy
1,Rob,25,15,Frank,Rob
2,Phil,26,30,Greg,Phil
3,Frank,57,20,Steve,Frank
4,Kim,30,41,Greg,Kim


- In previous examples, we merged tables using one column (one key)
- We can also merge tables using multiple columns (keys)

In [96]:
columns=['item', 'type', 'color', 'quantity']
d1=[ ('S-56', 'A', 'Red', 234), ('S-57', 'A', 'Blue', 432),
      ('S-58', 'A', 'Orange', 902), ('S-59', 'A', 'Red', 340), 
      ('S-60', 'B', 'Yellow', 253), ('S-61', 'B', 'Red', 232), 
      ('S-62', 'C', 'Green', 1042), ('S-63', 'C', 'Green', 1204),
      ('S-64', 'B', 'Yellow', 432), ('S-65', 'C', 'Green', 985) ]
items=pd.DataFrame(d1, columns=columns)
items

Unnamed: 0,item,type,color,quantity
0,S-56,A,Red,234
1,S-57,A,Blue,432
2,S-58,A,Orange,902
3,S-59,A,Red,340
4,S-60,B,Yellow,253
5,S-61,B,Red,232
6,S-62,C,Green,1042
7,S-63,C,Green,1204
8,S-64,B,Yellow,432
9,S-65,C,Green,985


In [97]:
d2=[('A', 'Red', 34.50), ('A', 'Blue', 53.00),
    ('A', 'Orange', 62.25), ('B', 'Yellow', 35.25),
    ('B','Red', 23.45), ('C', 'Red', 61.50), 
    ('C', 'Green', 72.20)]

prices=pd.DataFrame(d2, columns=['type', 'color', 'price'])
prices

Unnamed: 0,type,color,price
0,A,Red,34.5
1,A,Blue,53.0
2,A,Orange,62.25
3,B,Yellow,35.25
4,B,Red,23.45
5,C,Red,61.5
6,C,Green,72.2


In [98]:
df=items.merge(prices, on=['type','color'])
df

Unnamed: 0,item,type,color,quantity,price
0,S-56,A,Red,234,34.5
1,S-59,A,Red,340,34.5
2,S-57,A,Blue,432,53.0
3,S-58,A,Orange,902,62.25
4,S-60,B,Yellow,253,35.25
5,S-64,B,Yellow,432,35.25
6,S-61,B,Red,232,23.45
7,S-62,C,Green,1042,72.2
8,S-63,C,Green,1204,72.2
9,S-65,C,Green,985,72.2


<img src="images/img24.png" width="400"> 

## Exercise:
- Find the total price of all items:
    sum(quantity * price) ==> X
- Sum, median , min, max, mean for the price column 

In [None]:
Hint: df['quantity_price']=price * quantity
