In [52]:
import pandas as pd
import numpy as np

In [53]:
test_data = {"Name":["Manish", "Ram", "Sita", "Gita", None],
              "Salary":[98000, 99000,np.nan, 50000, 60000],
              "Age": [20, 22, 23, 21, np.nan],
              }

df = pd.DataFrame(test_data)
df

Unnamed: 0,Name,Salary,Age
0,Manish,98000.0,20.0
1,Ram,99000.0,22.0
2,Sita,,23.0
3,Gita,50000.0,21.0
4,,60000.0,


In [54]:

dept_data = {
    "Name":["Manish", "Ram", "Sita", "Gita", "hari"],
    "Department" : ["science", "commerce", "management", "humanities", "information technology"],
}
df_dept = pd.DataFrame(dept_data)
df_dept


Unnamed: 0,Name,Department
0,Manish,science
1,Ram,commerce
2,Sita,management
3,Gita,humanities
4,hari,information technology


##Types of Merges:
You can specify the type of merge with the how parameter:

'inner' (default): Returns only rows with keys present in both DataFrames.

'left': Returns all rows from the left DataFrame, and the matching rows from the right DataFrame.

'right': Returns all rows from the right DataFrame, and the matching rows from the left DataFrame.

'outer': Returns all rows from both DataFrames, with NaN for missing matches.

In [55]:
# merge the two dataframe
merged_df = pd.merge(df, df_dept, on = "Name")
merged_df

Unnamed: 0,Name,Salary,Age,Department
0,Manish,98000.0,20.0,science
1,Ram,99000.0,22.0,commerce
2,Sita,,23.0,management
3,Gita,50000.0,21.0,humanities


In [56]:
# merge using outer join
merged_df = pd.merge(df, df_dept, on = "Name", how = "outer")
merged_df

Unnamed: 0,Name,Salary,Age,Department
0,Gita,50000.0,21.0,humanities
1,Manish,98000.0,20.0,science
2,Ram,99000.0,22.0,commerce
3,Sita,,23.0,management
4,hari,,,information technology
5,,60000.0,,


In [57]:
## merge using left join
merged_df = pd.merge(df, df_dept, on = "Name", how = "left")
merged_df

Unnamed: 0,Name,Salary,Age,Department
0,Manish,98000.0,20.0,science
1,Ram,99000.0,22.0,commerce
2,Sita,,23.0,management
3,Gita,50000.0,21.0,humanities
4,,60000.0,,


In [58]:
## merge using inner join
merged_df = pd.merge(df, df_dept, on = "Name", how = "inner")
merged_df

Unnamed: 0,Name,Salary,Age,Department
0,Manish,98000.0,20.0,science
1,Ram,99000.0,22.0,commerce
2,Sita,,23.0,management
3,Gita,50000.0,21.0,humanities


In [59]:
## merge using right join
merged_df = pd.merge(df, df_dept, on = "Name", how="right")
merged_df

Unnamed: 0,Name,Salary,Age,Department
0,Manish,98000.0,20.0,science
1,Ram,99000.0,22.0,commerce
2,Sita,,23.0,management
3,Gita,50000.0,21.0,humanities
4,hari,,,information technology


In [60]:
## grouping and aggregation
agg_df = merged_df.groupby("Department").agg({
    "Salary": ["mean", "max", "min", "count", "std"],
    "Age": ["median", "count", "sum", "std"]
})
agg_df

Unnamed: 0_level_0,Salary,Salary,Salary,Salary,Salary,Age,Age,Age,Age
Unnamed: 0_level_1,mean,max,min,count,std,median,count,sum,std
Department,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
commerce,99000.0,99000.0,99000.0,1,,22.0,1,22.0,
humanities,50000.0,50000.0,50000.0,1,,21.0,1,21.0,
information technology,,,,0,,,0,0.0,
management,,,,0,,23.0,1,23.0,
science,98000.0,98000.0,98000.0,1,,20.0,1,20.0,


In [61]:
# sorting and renaming
df.sort_values(by="Salary", ascending=True, inplace=True)
df.rename(columns={"Name":"Person Name"}, inplace=True)
df

Unnamed: 0,Person Name,Salary,Age
3,Gita,50000.0,21.0
4,,60000.0,
0,Manish,98000.0,20.0
1,Ram,99000.0,22.0
2,Sita,,23.0


In [62]:
## To save the dataframe as a csv
df.to_csv("test_dataframe.csv", index=False)

In [63]:
# load from the saved csv
df = pd.read_csv("test_dataframe.csv")
df

Unnamed: 0,Person Name,Salary,Age
0,Gita,50000.0,21.0
1,,60000.0,
2,Manish,98000.0,20.0
3,Ram,99000.0,22.0
4,Sita,,23.0


In [None]:
# to view dataset available in seaborn
import seaborn as sns
sns.get_dataset_names()

['anagrams',
 'anscombe',
 'attention',
 'brain_networks',
 'car_crashes',
 'diamonds',
 'dots',
 'dowjones',
 'exercise',
 'flights',
 'fmri',
 'geyser',
 'glue',
 'healthexp',
 'iris',
 'mpg',
 'penguins',
 'planets',
 'seaice',
 'taxis',
 'tips',
 'titanic']