In [None]:
import pandas as pd
import numpy as np

In [None]:
# Creating a DataFrame with 1,000,000 rows
df = pd.DataFrame({
    'A': np.random.randint(1, 100, size=1000000),
    'B': np.random.randint(1, 100, size=1000000),
    'C': np.random.randn(1000000),
    'D': np.random.choice(['foo', 'bar', 'baz', 'qux'], size=1000000),
    'E': pd.date_range('2022-01-01', periods=1000000, freq='min')
})
df.head()

In [None]:
# Adding a new column F which is the sum of A and B
df["F"] = df["A"] + df["B"]
df.head()

In [None]:
# Updating column A: setting values less than 50 to 0
df.loc[df["A"] < 50, "A"] = 0
df.head()

In [None]:
# Applying a custom function to update column C
df["C"] = df["C"].apply(lambda x: x**2 if x > 0 else x)
df.head()

In [None]:
# Renaming columns
df.rename(columns={"A": "A_mod", "B": "B_mod"}, inplace=True)
df.head()

In [None]:
# Dropping column D
df.drop("D", axis=1, inplace=True)
df.head()

In [None]:
# Reindexing the DataFrame
df = df.reindex(index=df.index[::-1])
df.head()

In [None]:
# Introducing some NaN values and filling them
df.loc[0:100, "C"] = np.nan
df.fillna({"C": 0}, inplace=True)
df.head()

In [None]:
# Grouping by a column and updating another based on the group
df["G"] = df.groupby("B_mod")["C"].transform("mean")
df.head()

In [None]:
# Sorting the DataFrame by column E
df.sort_values("E", inplace=True)
df.head()

In [None]:
# Merging with another DataFrame and updating values
df = df.merge(
    pd.DataFrame({
        "E": pd.date_range("2022-01-01", periods=10, freq="D"),
        "H": np.random.randint(1000, 5000, size=10)
    }),
    on="E",
    how="left",
)
df.fillna({"H": 0}, inplace=True)
df.head()