<a href="https://colab.research.google.com/github/jmcconne100/Pandas_Notebook_Project/blob/main/practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    "name": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
    "age": [25, 42, 17, 33, np.nan]
})

# Add a new column using np.where
df["age_group"] = np.where(
    df["age"] < 18, "minor",
    np.where(df["age"] < 40, "adult", "senior")
)

# Handle missing values with np.where
df["age_group"] = np.where(df["age"].isna(), "unknown", df["age_group"])

print(df)


      name   age age_group
0    Alice  25.0     adult
1      Bob  42.0    senior
2  Charlie  17.0     minor
3    Diana  33.0     adult
4      Eve   NaN   unknown


In [None]:
df = pd.DataFrame({
    "employee": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
    "department": ["HR", "hr", "Finance", "FIN", "fin"]
})

# Define a mapping dictionary
dept_map = {
    "hr": "Human Resources",
    "HR": "Human Resources",
    "Hr": "Human Resources",
    "Finance": "Finance",
    "FIN": "Finance",
    "fin": "Finance"
}

# Apply .map() to standardize department names
df["department_clean"] = df["department"].map(dept_map)

print(df)


In [3]:
import pandas as pd

df = pd.DataFrame({
    "text": ["  Hello World  ", "  PYTHON   ", "Data Science  "]
})

# Normalize case and strip whitespace
df["normalized"] = df["text"].str.strip().str.lower()

print(df)


              text    normalized
0    Hello World     hello world
1        PYTHON           python
2   Data Science    data science


In [4]:
import pandas as pd

df = pd.DataFrame({
    "text": ["Hello, world!!!", "Data@Science#2025", "clean-text_is#fun"]
})

# Remove punctuation and special characters
df["normalized"] = df["text"].str.replace(r"[^\w\s]", "", regex=True)

print(df)


                text       normalized
0    Hello, world!!!      Hello world
1  Data@Science#2025  DataScience2025
2  clean-text_is#fun  cleantext_isfun


In [5]:
import pandas as pd

df = pd.DataFrame({
    "text": ["   Version 2.0   ", "  100 days of code! ", "Text123 cleaning  "]
})

# Remove digits and collapse multiple spaces
df["normalized"] = (
    df["text"]
    .str.replace(r"\d+", "", regex=True)   # remove digits
    .str.replace(r"\s+", " ", regex=True)  # collapse extra spaces
    .str.strip()
)

print(df)


                   text     normalized
0        Version 2.0         Version .
1    100 days of code!   days of code!
2    Text123 cleaning    Text cleaning


In [None]:
# Create a New Column by Arithmetic

import pandas as pd

df = pd.DataFrame({
    "price": [100, 250, 400],
    "discount": [10, 25, 50]
})

# Calculate discounted price
df["final_price"] = df["price"] - df["discount"]

print(df)


In [None]:
# Create Columns Conditionally with np.where()

import numpy as np
import pandas as pd

df = pd.DataFrame({
    "age": [15, 22, 35, 65]
})

# Create age group column
df["group"] = np.where(df["age"] < 18, "child",
               np.where(df["age"] < 60, "adult", "senior"))

print(df)


In [None]:
# Create Columns Using .map() for Category Mapping

import pandas as pd

df = pd.DataFrame({
    "gender": ["M", "F", "F", "M", "M"]
})

# Map shorthand codes to full labels
gender_map = {"M": "Male", "F": "Female"}
df["gender_full"] = df["gender"].map(gender_map)

print(df)


In [None]:
# Create Columns Based on Text Patterns

import pandas as pd

df = pd.DataFrame({
    "review": ["Excellent product!", "Terrible experience", "Good value", "Bad packaging"]
})

# Create sentiment flag based on keywords
df["sentiment"] = df["review"].str.lower().apply(
    lambda x: "positive" if "good" in x or "excellent" in x else
              "negative" if "bad" in x or "terrible" in x else
              "neutral"
)

print(df)


In [None]:
# Create Columns Using .apply() Across Multiple Columns

import pandas as pd

df = pd.DataFrame({
    "first_name": ["Alice", "Bob", "Charlie"],
    "last_name": ["Smith", "Brown", "Adams"]
})

# Create a full name column
df["full_name"] = df.apply(lambda row: f"{row['first_name']} {row['last_name']}", axis=1)

print(df)


In [None]:
# Filter Rows by Single Condition

import pandas as pd

df = pd.DataFrame({
    "name": ["Alice", "Bob", "Charlie", "Diana"],
    "age": [25, 42, 17, 33]
})

# Keep only rows where age >= 30
adults = df[df["age"] >= 30]
print(adults)


In [None]:
# Filter with Multiple Conditions (& / |)

# People who are at least 18 but under 40
subset = df[(df["age"] >= 18) & (df["age"] < 40)]
print(subset)


In [None]:
# Filter Using .isin() for Category Lists

df = pd.DataFrame({
    "department": ["HR", "Finance", "IT", "HR", "Sales"]
})

# Keep only HR and Finance rows
selected = df[df["department"].isin(["HR", "Finance"])]
print(selected)


In [None]:
# Filter Using String Matching

df = pd.DataFrame({
    "review": ["Excellent product", "Bad quality", "Good value", "Average performance"]
})

# Find rows containing "good" or "excellent" (case-insensitive)
positive = df[df["review"].str.contains("good|excellent", case=False, na=False)]
print(positive)


In [None]:
# Filter Using Query Expressions

df = pd.DataFrame({
    "name": ["Alice", "Bob", "Charlie", "Diana"],
    "age": [25, 42, 17, 33],
    "department": ["HR", "IT", "HR", "Finance"]
})

# Query syntax for readable filters
filtered = df.query("age >= 30 and department == 'HR'")
print(filtered)


In [None]:
# Sort by a Single Column

import pandas as pd

df = pd.DataFrame({
    "name": ["Alice", "Bob", "Charlie", "Diana"],
    "age": [25, 42, 17, 33]
})

# Sort by age (ascending by default)
sorted_df = df.sort_values("age")
print(sorted_df)


In [None]:
# Sort by Multiple Columns

df = pd.DataFrame({
    "department": ["HR", "Finance", "HR", "Finance"],
    "salary": [60000, 70000, 50000, 90000]
})

# Sort by department (A→Z), then salary (high→low)
sorted_df = df.sort_values(by=["department", "salary"], ascending=[True, False])
print(sorted_df)


In [None]:
# Sort by Index

df = pd.DataFrame({
    "name": ["Alice", "Bob", "Charlie"],
    "score": [85, 92, 88]
}, index=[3, 1, 2])

# Sort by the DataFrame’s index
sorted_df = df.sort_index()
print(sorted_df)


In [None]:
# Sort by String Column Alphabetically

df = pd.DataFrame({
    "fruit": ["Banana", "apple", "Orange", "grape"]
})

# Sort alphabetically, ignoring case
sorted_df = df.sort_values(by="fruit", key=lambda s: s.str.lower())
print(sorted_df)


In [None]:
# Sort by a Computed or Derived Column

df = pd.DataFrame({
    "product": ["A", "B", "C"],
    "sales": [500, 1500, 1000],
    "returns": [50, 100, 20]
})

# Sort by profit margin (sales - returns)
sorted_df = df.assign(margin=df["sales"] - df["returns"]).sort_values("margin", ascending=False)
print(sorted_df)


In [None]:
# Converting Strings to Datetime

import pandas as pd

df = pd.DataFrame({
    "date_str": ["2025-10-20", "2025/10/21", "Oct 22, 2025", "2025.10.23"]
})

# Convert strings to datetime objects
df["date"] = pd.to_datetime(df["date_str"], errors="coerce")

print(df)


In [None]:
# Extracting Date Parts

df["year"] = df["date"].dt.year
df["month"] = df["date"].dt.month
df["day"] = df["date"].dt.day
df["weekday"] = df["date"].dt.day_name()

print(df)


In [None]:
# Filtering by Date Range

# Keep rows between two dates
mask = (df["date"] >= "2025-10-21") & (df["date"] <= "2025-10-22")
filtered = df[mask]

print(filtered)
