In [1]:
# Importing Pandas for data manipulation and analysis
# Importing NumPy for numerical operations and array handling
import pandas as pd
import numpy as np

In [2]:
# 1. Initial product names and zero prices as Series

# Creating a Pandas Series to store product names
product_names = pd.Series(['soap', 'toothpaste', 'shampoo', 'body cream', 'face wash'])
print(f"\nProduct Names:\n{product_names}")

# Creating a corresponding Series for prices, initialized to zero for all products
product_prices = pd.Series([0] * len(product_names), dtype=float)
print(f"\nProduct Prices (Initialized to 0):\n{product_prices}")


Product Names:
0          soap
1    toothpaste
2       shampoo
3    body cream
4     face wash
dtype: object

Product Prices (Initialized to 0):
0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
dtype: float64


In [3]:
# 2. Assign random prices between 100 and 1000

# Generating 5 random float values between 100 and 1000, then rounding them to 2 decimal places
product_prices = pd.Series(np.round(np.random.uniform(100, 1000, size=5), 2))

# Displaying the generated prices
print("\nRandomly Assigned Product Prices:\n", product_prices)


Randomly Assigned Product Prices:
 0    197.08
1    844.55
2    797.80
3    361.59
4    789.26
dtype: float64


In [4]:
# 3. Add new product_names

# Creating a list of extra products to add
extra_products = ['bread', 'butter']

# Initializing an empty list to store user-provided prices for the new products
extra_prices = []

# Looping through each product in extra_products and collecting its price from the user
for item in extra_products:
    p = float(input(f"Enter price for {item}: "))  # Ensuring input is stored as a float for decimal values
    extra_prices.append(p)  # Adding the entered price to the list

# This list will now contain the user-defined prices for the extra products

Enter price for bread:  25
Enter price for butter:  36


In [5]:
# Append new products and their prices

# The parameter ignore_index=True tells Pandas to:
# - Reassign new sequential integer indices to the resulting Series.
# - Ignore the original indices from both product_names and extra_products.
# - Assign new indices starting from 0 up to the total length - 1.

product_names = pd.concat([product_names, pd.Series(extra_products)], ignore_index=True)
product_prices = pd.concat([product_prices, pd.Series(extra_prices)], ignore_index=True)

# Displaying updated Series after appending
print("\nUpdated Product Names:\n", product_names)
print("\nUpdated Product Prices:\n", product_prices)


Updated Product Names:
 0          soap
1    toothpaste
2       shampoo
3    body cream
4     face wash
5         bread
6        butter
dtype: object

Updated Product Prices:
 0    197.08
1    844.55
2    797.80
3    361.59
4    789.26
5     25.00
6     36.00
dtype: float64


In [6]:
# 4. Display all product_names

print("\nAll Products:")

# Looping through the product names and corresponding prices for a structured display
for name, price in zip(product_names, product_prices):
    print(f"Name: {name:12} | Price: {price:7.2f}")



All Products:
Name: soap         | Price:  197.08
Name: toothpaste   | Price:  844.55
Name: shampoo      | Price:  797.80
Name: body cream   | Price:  361.59
Name: face wash    | Price:  789.26
Name: bread        | Price:   25.00
Name: butter       | Price:   36.00


In [7]:
# 5. Delete 'shampoo' with step-by-step breakdown

# Finding the index of 'shampoo' in the product_names Series
drop_index = product_names[product_names == 'shampoo'].index
print("\nIndex of 'shampoo' to drop:", drop_index)

# Checking if the index exists before proceeding with deletion
if not drop_index.empty:
    # Dropping 'shampoo' from the product_names Series
    product_names = product_names.drop(drop_index)
    print("\nProduct Names after drop:\n", product_names)

    # Resetting the index of product_names to maintain sequential numbering
    product_names = product_names.reset_index(drop=True)
    print("\nProduct Names after index reset:\n", product_names)

    # Dropping the corresponding price of 'shampoo' in the product_prices Series
    product_prices = product_prices.drop(drop_index)
    print("\nProduct Prices after drop:\n", product_prices)

    # Resetting the index of product_prices to maintain sequential numbering
    product_prices = product_prices.reset_index(drop=True)
    print("\nProduct Prices after index reset:\n", product_prices)


Index of 'shampoo' to drop: Index([2], dtype='int64')

Product Names after drop:
 0          soap
1    toothpaste
3    body cream
4     face wash
5         bread
6        butter
dtype: object

Product Names after index reset:
 0          soap
1    toothpaste
2    body cream
3     face wash
4         bread
5        butter
dtype: object

Product Prices after drop:
 0    197.08
1    844.55
3    361.59
4    789.26
5     25.00
6     36.00
dtype: float64

Product Prices after index reset:
 0    197.08
1    844.55
2    361.59
3    789.26
4     25.00
5     36.00
dtype: float64


In [8]:
# 6. Add Rs. 5 to the price of 'body cream'

# Identifying the index of 'body cream' in the product_names Series
body_cream_index = product_names[product_names == 'body cream'].index

# Displaying intermediate steps to understand how filtering works
print(f"\nBoolean Array (product_names == 'body cream'):\n{product_names == 'body cream'}")
print(f"\nFiltered Series (product_names[product_names == 'body cream']):\n{product_names[product_names == 'body cream']}")
print(f"\nIndex of 'body cream': {body_cream_index}")

# Price update

product_prices[body_cream_index[0]] += 5  # Increasing the price by Rs. 5

# Displaying the updated prices
print("\nUpdated Product Prices:\n", product_prices)


Boolean Array (product_names == 'body cream'):
0    False
1    False
2     True
3    False
4    False
5    False
dtype: bool

Filtered Series (product_names[product_names == 'body cream']):
2    body cream
dtype: object

Index of 'body cream': Index([2], dtype='int64')

Updated Product Prices:
 0    197.08
1    844.55
2    366.59
3    789.26
4     25.00
5     36.00
dtype: float64


In [9]:
# 7. Insert 'toothbrush' at position 2, keeping rows 0 and 1 unchanged

# Accepting user input for the price of 'toothbrush'
p = float(input("\nEnter price for toothbrush: "))

# Splitting the product_names Series and inserting 'toothbrush' at index 2
product_names = pd.concat([
    product_names.iloc[:2],        # Selecting the first two rows of product_names
    pd.Series(['toothbrush']),      # Creating a new Series with 'toothbrush'
    product_names.iloc[2:]          # Selecting all rows from index 2 onward
]).reset_index(drop=True)           # Resetting the index for sequential ordering

# Printing the updated product names Series
print("\nUpdated Product Names:\n", product_names)


Enter price for toothbrush:  52



Updated Product Names:
 0          soap
1    toothpaste
2    toothbrush
3    body cream
4     face wash
5         bread
6        butter
dtype: object


In [10]:
# 8. Search for 'face wash'

# Finding the index of 'face wash' in the product_names Series
fw_index = product_names[product_names == 'face wash'].index

# Displaying the search results
print(f"\nBoolean Array (product_names == 'face wash'):\n{product_names == 'face wash'}")
print(f"\nIndex of 'face wash': {fw_index}")

# Checking if 'face wash' exists before accessing its price
if not fw_index.empty:
    print(f"\nFound 'Face Wash' -> Price: {product_prices[fw_index[0]]:.2f}")
else:
    print("\n'Face Wash' not found")


Boolean Array (product_names == 'face wash'):
0    False
1    False
2    False
3    False
4     True
5    False
6    False
dtype: bool

Index of 'face wash': Index([4], dtype='int64')

Found 'Face Wash' -> Price: 25.00


In [11]:
# 9. Multiply all prices by 1.10

# Applying a 10% price increase across all products
product_prices *= 1.10  # Element-wise multiplication

# Displaying updated prices after adjustment
print("\nUpdated Product Prices after 10% increase:\n", product_prices)


Updated Product Prices after 10% increase:
 0    216.788
1    929.005
2    403.249
3    868.186
4     27.500
5     39.600
dtype: float64


In [12]:
# 10. Calculate Average Price and Standard Deviation

# Calculating the average price using .mean() method
avg_price = product_prices.mean()
print(f"\nRaw Average Price: {avg_price}")

# Calculating the standard deviation using .std() method
std_dev_price = product_prices.std()
print(f"Raw Standard Deviation: {std_dev_price}")

# Displaying results with formatted output
print(f"\nAverage Price: {avg_price:.2f}")  # Rounded to 2 decimal places
print(f"Standard Deviation of Prices: {std_dev_price:.2f}")  # Rounded for clarity


Raw Average Price: 414.05466666666666
Raw Standard Deviation: 400.02020489853595

Average Price: 414.05
Standard Deviation of Prices: 400.02


In [13]:
# 11. Set 'toothbrush' name to None and count nulls

# Identifying where 'toothbrush' appears in product_names and setting it to None
product_names[product_names == 'toothbrush'] = None

# Counting the number of null values in the product_names Series
null_count = product_names.isnull().sum()

# Displaying the total count of null product names
print("\nUpdated Product Names:\n", product_names)
print(f"\nNumber of null product names: {null_count}")


Updated Product Names:
 0          soap
1    toothpaste
2          None
3    body cream
4     face wash
5         bread
6        butter
dtype: object

Number of null product names: 1


In [14]:
# Final display of non-null products

print("\nNon-null Products Only")

# Looping through product names and prices, ensuring only valid entries are displayed
for name, price in zip(product_names, product_prices):
    if name is not None:  # Filtering out None values
        print(f"{name:12} | Price: {price:7.2f}")


Non-null Products Only
soap         | Price:  216.79
toothpaste   | Price:  929.00
body cream   | Price:  868.19
face wash    | Price:   27.50
bread        | Price:   39.60


In [15]:
### Additional Pandas Methods and Properties ###

# 1. Creating an initial product DataFrame
product_data = {
    'Name': ['soap', 'toothpaste', 'shampoo', 'body cream', 'face wash'],  # Product names
    'Price': [0] * 5  # Initial prices set to zero
}

df = pd.DataFrame(product_data)  # Creating a Pandas DataFrame
print("Initial Products:")
print(df)  # Displaying the complete DataFrame

# Exploring basic Pandas methods
print(df.head())  # Displays first few rows for quick overview
print(df.info())  # Provides dataset summary, including data types and missing values
print(df.describe())  # Gives statistical summary of numerical columns
print(df.dtypes)  # Shows data types of each column
print(df.shape)  # Returns tuple (rows, columns) to understand structure

Initial Products:
         Name  Price
0        soap      0
1  toothpaste      0
2     shampoo      0
3  body cream      0
4   face wash      0
         Name  Price
0        soap      0
1  toothpaste      0
2     shampoo      0
3  body cream      0
4   face wash      0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    5 non-null      object
 1   Price   5 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 212.0+ bytes
None
       Price
count    5.0
mean     0.0
std      0.0
min      0.0
25%      0.0
50%      0.0
75%      0.0
max      0.0
Name     object
Price     int64
dtype: object
(5, 2)


In [16]:
# Sorting the DataFrame by the 'Price' column in ascending order
print(df.sort_values(by='Price'))

# Applying a function to modify a column's values
def apply_discount(price):
    return price * 0.9  # Applying a 10% discount to each price value

# Creating a new column with discounted prices
df['Discounted Price'] = df['Price'].apply(apply_discount)

# Display the updated DataFrame
print(df)

         Name  Price
0        soap      0
1  toothpaste      0
2     shampoo      0
3  body cream      0
4   face wash      0
         Name  Price  Discounted Price
0        soap      0               0.0
1  toothpaste      0               0.0
2     shampoo      0               0.0
3  body cream      0               0.0
4   face wash      0               0.0


In [17]:
# Indexing and selection in Pandas

# Select the first two rows using integer-location based indexing (.iloc)
print(df.iloc[:2])  # Displays the first two rows in the DataFrame

# Select rows where the 'Price' column is greater than 100
print(df.loc[df['Price'] > 100])
# Uses .loc[] to filter rows based on column conditions

# Creating a more detailed product dataset with additional attributes
product_data = {
    'Name': ['soap', 'toothpaste', 'shampoo', 'body cream', 'face wash', 'bread', 'butter'],
    'Price': [150.00, 250.50, 300.00, 450.75, 200.00, 50.25, 75.00],
    'Category': ['Cosmetic', 'Cosmetic', 'Cosmetic', 'Cosmetic', 'Cosmetic', 'Food', 'Food'],
    'In_Stock': [True, True, False, True, False, True, True]  # Stock availability status
}

# Convert dictionary into a Pandas DataFrame
df = pd.DataFrame(product_data)
print(df)  # Display the complete DataFrame

         Name  Price  Discounted Price
0        soap      0               0.0
1  toothpaste      0               0.0
Empty DataFrame
Columns: [Name, Price, Discounted Price]
Index: []
         Name   Price  Category  In_Stock
0        soap  150.00  Cosmetic      True
1  toothpaste  250.50  Cosmetic      True
2     shampoo  300.00  Cosmetic     False
3  body cream  450.75  Cosmetic      True
4   face wash  200.00  Cosmetic     False
5       bread   50.25      Food      True
6      butter   75.00      Food      True


In [18]:
# Filtering 'Cosmetic' category items that are in stock
cosmetic_in_stock = df[(df['Category'] == 'Cosmetic') & (df['In_Stock'] == True)]
print(cosmetic_in_stock)

# Filtering items that are either expensive (price > 200) OR out of stock
expensive_or_out_of_stock = df[(df['Price'] > 200) | (df['In_Stock'] == False)]
print(expensive_or_out_of_stock)

# Using Pandas 'query' function for cleaner syntax
food_or_cheap_query = df.query("Category == 'Food' or Price < 100")
print(food_or_cheap_query)

         Name   Price  Category  In_Stock
0        soap  150.00  Cosmetic      True
1  toothpaste  250.50  Cosmetic      True
3  body cream  450.75  Cosmetic      True
         Name   Price  Category  In_Stock
1  toothpaste  250.50  Cosmetic      True
2     shampoo  300.00  Cosmetic     False
3  body cream  450.75  Cosmetic      True
4   face wash  200.00  Cosmetic     False
     Name  Price Category  In_Stock
5   bread  50.25     Food      True
6  butter  75.00     Food      True


In [19]:
# String functions in Pandas

# Convert all names to uppercase
df['Name_Upper'] = df['Name'].str.upper()
print(df[['Name', 'Name_Upper']])

# Calculate length of each product name
df['Name_Length'] = df['Name'].str.len()
print(df[['Name', 'Name_Length']])

# Check if each product name starts with 's'
df['Starts_With_S'] = df['Name'].str.startswith('s')
print(df[['Name', 'Starts_With_S']])

# Identify product names containing 'tooth'
df['Contains_Tooth'] = df['Name'].str.contains('tooth')
print(df[['Name', 'Contains_Tooth']])

# Replace occurrences of 'cream' with 'lotion' in product names
df['Name_Replaced'] = df['Name'].str.replace('cream', 'lotion', regex=False)
print(df[['Name', 'Name_Replaced']])

         Name  Name_Upper
0        soap        SOAP
1  toothpaste  TOOTHPASTE
2     shampoo     SHAMPOO
3  body cream  BODY CREAM
4   face wash   FACE WASH
5       bread       BREAD
6      butter      BUTTER
         Name  Name_Length
0        soap            4
1  toothpaste           10
2     shampoo            7
3  body cream           10
4   face wash            9
5       bread            5
6      butter            6
         Name  Starts_With_S
0        soap           True
1  toothpaste          False
2     shampoo           True
3  body cream          False
4   face wash          False
5       bread          False
6      butter          False
         Name  Contains_Tooth
0        soap           False
1  toothpaste            True
2     shampoo           False
3  body cream           False
4   face wash           False
5       bread           False
6      butter           False
         Name Name_Replaced
0        soap          soap
1  toothpaste    toothpaste
2     shampoo       

In [20]:
# Creating a sample DataFrame with product sales data grouped by category and region
data = {
    'Category': ['Cosmetic', 'Cosmetic', 'Food', 'Food', 'Cosmetic', 'Food'],
    'Product': ['Soap', 'Lotion', 'Bread', 'Butter', 'Shampoo', 'Milk'],
    'Region': ['North', 'South', 'North', 'South', 'North', 'East'],
    'Sales': [150, 200, 120, 80, 250, 90],
    'Units': [10, 8, 15, 10, 12, 5]
}

df_new = pd.DataFrame(data)  # Convert dictionary into Pandas DataFrame
print(df_new.head())  # Display first few rows

# Grouping by 'Category' and calculating total sales per category
grouped_category_sales = df_new.groupby('Category')['Sales'].sum()
print(grouped_category_sales)  # Shows total sales per category

# Using `.agg()` function for multiple aggregations
grouped_category_multi_agg = df_new.groupby('Category').agg(
    Total_Sales=('Sales', 'sum'),  # Summing total sales per category
    Average_Units=('Units', 'mean'),  # Computing average units sold per category
    Number_of_Products=('Product', 'count')  # Counting the number of products in each category
)
print(grouped_category_multi_agg)

   Category  Product Region  Sales  Units
0  Cosmetic     Soap  North    150     10
1  Cosmetic   Lotion  South    200      8
2      Food    Bread  North    120     15
3      Food   Butter  South     80     10
4  Cosmetic  Shampoo  North    250     12
Category
Cosmetic    600
Food        290
Name: Sales, dtype: int64
          Total_Sales  Average_Units  Number_of_Products
Category                                                
Cosmetic          600           10.0                   3
Food              290           10.0                   3
