In [1]:
import pandas as pd
import json
import ast

# Sample DataFrame with different formats
data = {
    'id': [1, 2, 3],
    'json_data': ['{"name": "Alice", "age": 25}', '{"name": "Bob", "age": 30}', '{"name": "Charlie", "age": 35}'],
    'dict_data': ["{'city': 'New York', 'salary': 70000}", "{'city': 'London', 'salary': 65000}", "{'city': 'Berlin', 'salary': 60000}"],
    'csv_like': ['Alice,Developer,5000', 'Bob,Manager,7000', 'Charlie,Analyst,6000']
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

# **1. Convert JSON String Column into Dictionary**
df['json_data'] = df['json_data'].apply(json.loads)  # Convert JSON strings to dictionaries
df_json_normalized = pd.json_normalize(df['json_data'])  # Normalize JSON data
df = df.drop(columns=['json_data']).join(df_json_normalized)

# **2. Convert Dictionary String to Actual Dictionary**
df['dict_data'] = df['dict_data'].apply(ast.literal_eval)  # Convert dictionary-like string to a real dictionary
df_dict_normalized = pd.json_normalize(df['dict_data'])  # Normalize dictionary data
df = df.drop(columns=['dict_data']).join(df_dict_normalized)

# **3. Process CSV-Like Column**
df[['name_csv', 'role', 'salary_csv']] = df['csv_like'].str.split(',', expand=True)
df = df.drop(columns=['csv_like'])

print("\nProcessed DataFrame:")
print(df)


Original DataFrame:
   id                       json_data                              dict_data  \
0   1    {"name": "Alice", "age": 25}  {'city': 'New York', 'salary': 70000}   
1   2      {"name": "Bob", "age": 30}    {'city': 'London', 'salary': 65000}   
2   3  {"name": "Charlie", "age": 35}    {'city': 'Berlin', 'salary': 60000}   

               csv_like  
0  Alice,Developer,5000  
1      Bob,Manager,7000  
2  Charlie,Analyst,6000  

Processed DataFrame:
   id     name  age      city  salary name_csv       role salary_csv
0   1    Alice   25  New York   70000    Alice  Developer       5000
1   2      Bob   30    London   65000      Bob    Manager       7000
2   3  Charlie   35    Berlin   60000  Charlie    Analyst       6000


In [2]:
# Merge Columns

In [3]:
# Different Types of Merges in Pandas
import pandas as pd

# Create two sample DataFrames
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Salary': [70000, 80000, 90000]})
display(df1)
display(df2)

Unnamed: 0,ID,Name
0,1,Alice
1,2,Bob
2,3,Charlie


Unnamed: 0,ID,Salary
0,2,70000
1,3,80000
2,4,90000


In [4]:
# Inner Join
inner_join = pd.merge(df1, df2, on='ID', how='inner')
display(inner_join)

Unnamed: 0,ID,Name,Salary
0,2,Bob,70000
1,3,Charlie,80000


In [5]:
# Left Join
left_join = pd.merge(df1, df2, on='ID', how='left')
display(left_join)

Unnamed: 0,ID,Name,Salary
0,1,Alice,
1,2,Bob,70000.0
2,3,Charlie,80000.0


In [6]:
# Right Join
right_join = pd.merge(df1, df2, on='ID', how='right')
display(right_join)

Unnamed: 0,ID,Name,Salary
0,2,Bob,70000
1,3,Charlie,80000
2,4,,90000


In [7]:
# Outer Join
outer_join = pd.merge(df1, df2, on='ID', how='outer')
display(outer_join)

Unnamed: 0,ID,Name,Salary
0,1,Alice,
1,2,Bob,70000.0
2,3,Charlie,80000.0
3,4,,90000.0


In [8]:
# Cross Join
cross_join = df1.merge(df2, how='cross')
display(cross_join)

Unnamed: 0,ID_x,Name,ID_y,Salary
0,1,Alice,2,70000
1,1,Alice,3,80000
2,1,Alice,4,90000
3,2,Bob,2,70000
4,2,Bob,3,80000
5,2,Bob,4,90000
6,3,Charlie,2,70000
7,3,Charlie,3,80000
8,3,Charlie,4,90000
