# Notes on Pandas

In [24]:
import pandas as pd

years = [2020, 2021, 2021, 2022, 2023]
months = [1, 2, 9, 3, 4]
days = [5, 6, 5, 7, 8]
expenses = [1000, 1050.5, 1069.2,  980.8, 1115.6]

df = pd.DataFrame({'year': years, 'month': months, 'Tag': days, 'expenses': expenses})
df

Unnamed: 0,year,month,Tag,expenses
0,2020,1,5,1000.0
1,2021,2,6,1050.5
2,2021,9,5,1069.2
3,2022,3,7,980.8
4,2023,4,8,1115.6


How to select **certain columns** from **certain rows**

In [25]:
df[df['year'] == 2021][['Tag','month']]

Unnamed: 0,Tag,month
1,6,2
2,5,9


**Sort**

In [26]:
df_sorted = df.sort_values('expenses',ascending=False)
df_sorted

Unnamed: 0,year,month,Tag,expenses
4,2023,4,8,1115.6
2,2021,9,5,1069.2
1,2021,2,6,1050.5
0,2020,1,5,1000.0
3,2022,3,7,980.8


Drop duplicates

In [27]:
df.drop_duplicates(subset='year', inplace = True)
df

Unnamed: 0,year,month,Tag,expenses
0,2020,1,5,1000.0
1,2021,2,6,1050.5
3,2022,3,7,980.8
4,2023,4,8,1115.6


Rename columns

In [28]:
df.rename(columns={'Tag':'day'},inplace=True)
df

Unnamed: 0,year,month,day,expenses
0,2020,1,5,1000.0
1,2021,2,6,1050.5
3,2022,3,7,980.8
4,2023,4,8,1115.6


Convert column types

In [29]:
df['expenses'] = df['expenses'].round()
df['expenses'] = df['expenses'].astype(int)
df

Unnamed: 0,year,month,day,expenses
0,2020,1,5,1000
1,2021,2,6,1050
3,2022,3,7,981
4,2023,4,8,1116


In [30]:
years = [2021, 2020,2023,2022,2021,2023]
months = [1, 2, 1,2,3,8]
days = [5, 6,12,32,15,24]
expenses = [1000, 1050, 2000,2100,900,850]

df2 = pd.DataFrame({'year': years, 'month': months, 'day': days, 'expenses': expenses})

Concatenating two df-s (**add new df below**)

In [31]:
df_new = pd.concat([df,df2])
df_new

Unnamed: 0,year,month,day,expenses
0,2020,1,5,1000
1,2021,2,6,1050
3,2022,3,7,981
4,2023,4,8,1116
0,2021,1,5,1000
1,2020,2,6,1050
2,2023,1,12,2000
3,2022,2,32,2100
4,2021,3,15,900
5,2023,8,24,850


Pivot table

In [32]:
df_new.pivot(index='month',columns='year',values='expenses')

year,2020,2021,2022,2023
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1000.0,1000.0,,2000.0
2,1050.0,1050.0,2100.0,
3,,900.0,981.0,
4,,,,1116.0
8,,,,850.0


**Melt** table (reverse pivot)

In [33]:
data=[['Umbrella',417,224,279,611],['SleepingBag',800,936,93,875]]

df_sales = pd.DataFrame(data, columns=['product','quarter_1','quarter_2','quarter_3','quarter_4'])
df_sales

Unnamed: 0,product,quarter_1,quarter_2,quarter_3,quarter_4
0,Umbrella,417,224,279,611
1,SleepingBag,800,936,93,875


In [34]:
df_sales.melt(id_vars=['product'],value_vars=['quarter_1','quarter_2','quarter_3','quarter_4'],var_name='quarter', value_name='sales')

Unnamed: 0,product,quarter,sales
0,Umbrella,quarter_1,417
1,SleepingBag,quarter_1,800
2,Umbrella,quarter_2,224
3,SleepingBag,quarter_2,936
4,Umbrella,quarter_3,279
5,SleepingBag,quarter_3,93
6,Umbrella,quarter_4,611
7,SleepingBag,quarter_4,875
