In [None]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Input data
data = {
    'ID': [1, 2, 3, 4, 5, 6],
    'Name': ['Amit', 'Sneha', 'Rohan', 'Priya', 'Kunal', 'Neha'],
    'Age': [25, 30, 35, 40, 22, 29],
    'Country': ['India', 'UK', 'USA', 'Australia', 'Canada', 'Russia'],
    'Sales': [200, 450, 300, 800, 150, 400]    }

In [None]:
# Create dataframe
df = pd.DataFrame(data)
print("Original Dataset:")
print(df)

Original Dataset:
   ID   Name  Age    Country  Sales
0   1   Amit   25      India    200
1   2  Sneha   30         UK    450
2   3  Rohan   35        USA    300
3   4  Priya   40  Australia    800
4   5  Kunal   22     Canada    150
5   6   Neha   29     Russia    400


In [None]:
#Data Transforamtions

#character map
#description: Transform text data by changing the case of characters.
#Here, we will convert the Name column to uppercase
df['Name_Upper']=df['Name'].str.upper()
print("\nCharacter Map (Uppercase Names):")
print(df[['ID','Name','Name_Upper']])


Character Map (Uppercase Names):
   ID   Name Name_Upper
0   1   Amit       AMIT
1   2  Sneha      SNEHA
2   3  Rohan      ROHAN
3   4  Priya      PRIYA
4   5  Kunal      KUNAL
5   6   Neha       NEHA


In [None]:
#multicast: create two copies of the dataset
df_copy1 = df.copy()
df_copy2 = df.copy()

#transformations on each copy
df_copy1['Sales'] *= 1.1 #increase sales by 10%
df_copy2['Age'] += 5

In [None]:
print("\nMulticast (Modified copies):")
print("Copy 1 (Sales Increased):")
print(df_copy1)
print("\nCopy 2 (Age Increased):")
print(df_copy2)


Multicast (Modified copies):
Copy 1 (Sales Increased):
   ID   Name  Age    Country  Sales Name_Upper
0   1   Amit   25      India  220.0       AMIT
1   2  Sneha   30         UK  495.0      SNEHA
2   3  Rohan   35        USA  330.0      ROHAN
3   4  Priya   40  Australia  880.0      PRIYA
4   5  Kunal   22     Canada  165.0      KUNAL
5   6   Neha   29     Russia  440.0       NEHA

Copy 2 (Age Increased):
   ID   Name  Age    Country  Sales Name_Upper
0   1   Amit   30      India    200       AMIT
1   2  Sneha   35         UK    450      SNEHA
2   3  Rohan   40        USA    300      ROHAN
3   4  Priya   45  Australia    800      PRIYA
4   5  Kunal   27     Canada    150      KUNAL
5   6   Neha   34     Russia    400       NEHA


In [None]:
 #3. Conditional split
 #description: split data based on a condition.
 #Here, we will separate rows with Sales > 300.
 #Conditional split: Sales > 300
 high_sales = df[df['Sales'] > 300]
 low_sales = df[df['Sales'] <= 300]

 print("\nHigh sales :")
 print(high_sales)
 print("\nLow sales :")
 print(low_sales)


High sales :
   ID   Name  Age    Country  Sales Name_Upper
1   2  Sneha   30         UK    450      SNEHA
3   4  Priya   40  Australia    800      PRIYA
5   6   Neha   29     Russia    400       NEHA

Low sales :
   ID   Name  Age Country  Sales Name_Upper
0   1   Amit   25   India    200       AMIT
2   3  Rohan   35     USA    300      ROHAN
4   5  Kunal   22  Canada    150      KUNAL


In [None]:
#4.Aggreagation
#Description: Aggreagate data, e.g.,calculate Total Sales by Country
agg_df = df.groupby('Country')['Sales'].sum().reset_index()
print("\nAggregation (Total Sales by Country):")
print(agg_df)


Aggregation (Total Sales by Country):
     Country  Sales
0  Australia    800
1     Canada    150
2      India    200
3     Russia    400
4         UK    450
5        USA    300


In [None]:
 #5.Sort
 #Description: Sort the dataset by Sales in descending order.
 sorted_df = df.sort_values(by='Sales', ascending=False)
 print("\nSort (Descending Sales):")
 print(sorted_df)


Sort (Descending Sales):
   ID   Name  Age    Country  Sales Name_Upper
3   4  Priya   40  Australia    800      PRIYA
1   2  Sneha   30         UK    450      SNEHA
5   6   Neha   29     Russia    400       NEHA
2   3  Rohan   35        USA    300      ROHAN
0   1   Amit   25      India    200       AMIT
4   5  Kunal   22     Canada    150      KUNAL


In [None]:
 #6.Derived Column: Categorize sales as 'High' or 'Low'.
 #create a new column by deriving information from existing data.
 #Derived Column: Categorize sales as 'High' or 'Low'.
 df['Sales_Category'] = df['Sales'].apply(lambda x: 'High' if x > 300 else'Low')
 print("\nDerived Column (Sales Category):")
 print(df[['ID','Name','Sales','Sales_Category']])


Derived Column (Sales Category):
   ID   Name  Sales Sales_Category
0   1   Amit    200            Low
1   2  Sneha    450           High
2   3  Rohan    300            Low
3   4  Priya    800           High
4   5  Kunal    150            Low
5   6   Neha    400           High
