### pivot() - is used to reshhape dataset from long to wide

In [1]:
import pandas as pd

In [2]:
people = pd.read_csv('people.csv', sep=';')
people

Unnamed: 0,Day,Salary,Name
0,1,"10,00 €",Mario
1,2,"11,00 €",Mario
2,3,"8,00 €",Mario
3,4,"7,00 €",Mario
4,5,"10,00 €",Mario
5,6,"12,00 €",Mario
6,7,"20,00 €",Mario
7,1,"5,00 €",Luigi
8,2,"5,00 €",Luigi
9,3,"6,00 €",Luigi


In [3]:

people = people.pivot(index='Day', columns='Name', values='Salary')
people

Name,James,Luigi,Mario
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,"10,00 €","5,00 €","10,00 €"
2,"20,00 €","5,00 €","11,00 €"
3,"20,00 €","6,00 €","8,00 €"
4,"10,00 €","3,00 €","7,00 €"
5,"5,00 €","2,00 €","10,00 €"
6,"10,00 €","5,00 €","12,00 €"
7,"40,00 €","4,00 €","20,00 €"


#### melt() exactly does the opposite of pivot

In [4]:
people.melt()

Unnamed: 0,Name,value
0,James,"10,00 €"
1,James,"20,00 €"
2,James,"20,00 €"
3,James,"10,00 €"
4,James,"5,00 €"
5,James,"10,00 €"
6,James,"40,00 €"
7,Luigi,"5,00 €"
8,Luigi,"5,00 €"
9,Luigi,"6,00 €"


### pivot_table() is very simliar to pivot(), but it allows us to perform operations on our DataFrame.
- pivot_table() can also handle duplicates

In [5]:
people = pd.read_csv('people2.csv', sep=';')
people

Unnamed: 0,Day,Salary,Name,Gender
0,1,"10,00 €",Mario,Male
1,2,"11,00 €",Mario,Male
2,3,"8,00 €",Mario,Male
3,4,"7,00 €",Mario,Male
4,5,"10,00 €",Mario,Male
5,6,"12,00 €",Mario,Male
6,7,"20,00 €",Mario,Male
7,1,"5,00 €",Luigi,Female
8,2,"5,00 €",Luigi,Female
9,3,"6,00 €",Luigi,Female


#### Convert the Salary column into a numeric value

In [6]:
formatted_salary = people['Salary'].str.replace('€', '').str.replace(',', '').astype(int)/100
people['Salary'] = formatted_salary
people

Unnamed: 0,Day,Salary,Name,Gender
0,1,10.0,Mario,Male
1,2,11.0,Mario,Male
2,3,8.0,Mario,Male
3,4,7.0,Mario,Male
4,5,10.0,Mario,Male
5,6,12.0,Mario,Male
6,7,20.0,Mario,Male
7,1,5.0,Luigi,Female
8,2,5.0,Luigi,Female
9,3,6.0,Luigi,Female


#### Get the sum of the salary of each person

In [7]:
people.pivot_table(values='Salary',      # which values you want to use
                   columns='Name',       # which columns to perform the aggfunc on
                   aggfunc='sum')        # which operation you want to use (mean is default)

Name,James,Luigi,Mario
Salary,115.0,30.0,78.0


#### Get thge avg salary of each person on each day

In [8]:
people.pivot_table(values='Salary', columns='Day').round(2)       # mean is by default

Day,1,2,3,4,5,6,7
Salary,8.33,12.0,11.33,6.67,5.67,9.0,21.33


#### Find what the avg salary of each gender per day

In [9]:
people.pivot_table(index='Day',
                   values='Salary',       # which values you want to use
                   columns='Gender',      # which columns to perform the aggfunc on
                   aggfunc='mean')        # which operation you want to use (mean is default)

Gender,Female,Male
Day,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5.0,10.0
2,5.0,15.5
3,6.0,14.0
4,3.0,8.5
5,2.0,7.5
6,5.0,11.0
7,4.0,30.0
