## pandas.cut

In [None]:
import pandas as pd

data = [1, 5, 10, 15, 20]
bins = [0, 10, 20]

result = pd.cut(data, bins=bins)
print(result)


In [None]:
result = pd.cut(data, bins=bins, right=False)
print(result)


In [None]:
bins = [0, 10, 20]
labels = ["Low", "High"]
pd.cut(data, bins=bins, labels=labels)


In [None]:
pd.cut(data, bins=bins, include_lowest=True)

In [None]:
pd.cut(data, bins=3, precision=1)

In [None]:
result = pd.cut(data, bins=[0, 10, 20], labels=["Low", "High"])
print(result.categories) 
print(result.codes)


## sns.FacetGrid

`FacetGrid` allows you to:

- Create multiple subplots based on categorical variables.
- Visualize different subsets of the data.
- Share axes across all subplots for comparison.
- Add layers like scatterplots, lineplots, and histograms to the subplots.

g = sns.FacetGrid(data, col="col_name", row="row_name", hue="hue_name")

g.map(function, "x", "y")

- `data`: The DataFrame containing your data.
- `col`: Creates columns of subplots for each unique value in the variable.
- `row`: Creates rows of subplots for each unique value in the variable.
- `hue`: Differentiates within each subplot by color.
- `map()` or `map_dataframe()`: Specifies which plotting function (e.g., `sns.scatterplot`) to use and maps the variables to the plot.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Load example dataset
tips = sns.load_dataset("tips")

In [None]:
tips.sample(3)

In [None]:
tips.info()

In [None]:
for col in tips.columns:
    uniques = tips[col].unique()
    print(f"{col:<60}{len(uniques):<10}{uniques[:5]}")

In [None]:
# Create a FacetGrid with columns based on the 'time' variable
g = sns.FacetGrid(tips, col="time")
g.map_dataframe(sns.histplot, "total_bill", bins=10, color="blue")

# Add titles and labels
g.set_axis_labels("Total Bill", "Count")
# g.set_titles("{col_name} Time")

plt.show()

In [None]:
# You can add both rows and columns to represent two categorical variables.

g = sns.FacetGrid(tips, col="time", row="sex", margin_titles=True)
g.map_dataframe(sns.scatterplot, "total_bill", "tip", alpha=0.7)

# Add titles and labels
g.set_axis_labels("Total Bill", "Tip")
# g.set_titles("{row_name}, {col_name}")

plt.show()


In [None]:
# You can differentiate data within each subplot by using the hue parameter.
g = sns.FacetGrid(tips, col="time", hue="sex", col_wrap=2)
g.map_dataframe(sns.scatterplot, "total_bill", "tip", alpha=0.7)

# Add a legend
g.add_legend()
plt.show()


In [None]:
g = sns.FacetGrid(tips, col="day", height=4)
g.map_dataframe(sns.boxplot, "time", "total_bill", palette="muted")
plt.show()


In [None]:
# If the number of unique categories is too large, you can "wrap" the columns into multiple rows.
g = sns.FacetGrid(tips, col="day", col_wrap=2, height=4)
g.map_dataframe(sns.boxplot, "time", "total_bill", palette="muted")
plt.show()


In [None]:
# You can customize the axes, titles, and other elements of the grid.

g = sns.FacetGrid(tips, col="time", row="sex")
g.map_dataframe(sns.scatterplot, "total_bill", "tip")

# Set custom titles and axis labels
g.set_titles("Time: {col_name}, Sex: {row_name}")
g.set_axis_labels("Total Bill", "Tip Amount")
g.set(xlim=(0, 50), ylim=(0, 10))
plt.show()


In [None]:
# You can add multiple layers (e.g., regression lines) to each subplot.

g = sns.FacetGrid(tips, col="time", row="sex", hue="smoker")
g.map_dataframe(sns.scatterplot, "total_bill", "tip", alpha=0.7)
g.map_dataframe(sns.lineplot, "total_bill", "tip", ci=None)
g.add_legend()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Create a dataset
np.random.seed(42)
dates = pd.date_range("2020-01-01", "2022-12-31", freq="M")
companies = [1, 2, 3]
data = []

for company in companies:
    revenue = np.random.uniform(10000, 50000, len(dates))
    cost = revenue - np.random.uniform(2000, 15000, len(dates))  # Costs are less than revenue
    data.extend(zip([company] * len(dates), dates, revenue, cost))

df = pd.DataFrame(data, columns=["company_id", "month", "revenue", "cost"])


In [None]:
# Define the plotting function
def plot_with_fill(data, **kwargs):
    ax = plt.gca()
    x = data["month"].values
    revenue = data["revenue"].values
    cost = data["cost"].values

    # Plot revenue and cost lines
    ax.plot(x, revenue, label="Revenue", color="orange", linewidth=2)
    ax.plot(x, cost, label="Cost", color="red", linewidth=2)

    # Fill between revenue and cost
    ax.fill_between(x, revenue, cost, where=(revenue >= cost), color="lightgreen", alpha=0.5, label="Profit")
    ax.fill_between(x, revenue, cost, where=(revenue < cost), color="lightcoral", alpha=0.5, label="Loss")

# Apply FacetGrid
g = sns.FacetGrid(data=df, col="company_id", col_wrap=3, height=4, aspect=1.5)
g.map_dataframe(plot_with_fill)

# Format x-axis
for ax in g.axes.flat:
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m"))
    ax.tick_params(axis="x")

g.add_legend()
plt.show()


## plt.fill_between

Using fill_between in matplotlib allows you to shade the area between two lines to highlight differences.
- matplotlib.pyplot.fill_between(x, y1, y2=0, where=None, interpolate=False, **kwargs)
    - x: Array of x-coordinates.
    - y1: Array of y-coordinates for the first line.
    - y2: Array of y-coordinates for the second line (default is 0).
    - where: A condition to selectively fill between lines.
    - **kwargs: Additional parameters like color, alpha, label, etc.

### Basic Fill Between Two Lines

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Generate data
x = np.linspace(0, 10, 500)
y1 = np.sin(x)
y2 = np.cos(x)

# Plot lines
plt.plot(x, y1, label="sin(x)", color="blue")
plt.plot(x, y2, label="cos(x)", color="orange")

# Fill the area between
plt.fill_between(x, y1, y2, where=(y1 >= y2), color="lightgreen", alpha=0.5, label="sin(x) >= cos(x)")
plt.fill_between(x, y1, y2, where=(y1 < y2), color="lightcoral", alpha=0.5, label="sin(x) < cos(x)")

# Add legend and labels
plt.legend()
plt.title("Area Between sin(x) and cos(x)")
plt.xlabel("x")
plt.ylabel("y")
plt.show()


### Shading Below a Single Line

In [None]:
# Generate data
x = np.linspace(0, 10, 500)
y = np.sin(x)

# Plot the line
plt.plot(x, y, label="sin(x)", color="blue")

# Fill the area below the line
plt.fill_between(x, y, color="lightblue", alpha=0.5, label="Area below sin(x)")

# Add legend and labels
plt.legend()
plt.title("Shading Below sin(x)")
plt.xlabel("x")
plt.ylabel("y")
plt.show()


### Highlight Specific Regions

In [None]:
# Generate data
x = np.linspace(0, 10, 500)
y = np.sin(x)

# Define regions
plt.plot(x, y, label="sin(x)", color="blue")
plt.fill_between(x, y, where=(y > 0), color="lightgreen", alpha=0.5, label="Positive")
plt.fill_between(x, y, where=(y < 0), color="lightcoral", alpha=0.5, label="Negative")

# Add legend and labels
plt.legend()
plt.title("Highlighting Positive and Negative Regions")
plt.xlabel("x")
plt.ylabel("y")
plt.show()


## ax.xaxis.set_major_formatter(formatter)

`set_major_formatter` is a method in Matplotlib that allows you to format the major tick labels on an axis. This is particularly useful when dealing with date or numeric axes, enabling customization of the tick label display.

- ax: The Axes object (e.g., plt.gca() for the current axes).
- formatter: A matplotlib.ticker.Formatter object or a function that specifies how to format the tick labels.
    - `matplotlib.dates.DateFormatter` for date formatting.
    - `matplotlib.ticker.FuncFormatter` for custom formatting.
    - `matplotlib.ticker.PercentFormatter` for percentage formatting.
        - Use `xmax` to define the maximum value corresponding to 100% (e.g., 1 for normalized data or 100 for raw percentages).
        - Use `decimals` to control the number of decimal places. (e.g., decimals=2)
        - Use `symbol` to customize or omit the percentage symbol. (e.g., symbol=' percent')


### Formatting Dates

In [None]:
pd.date_range(start="2023-01-01", periods=12, freq="M").values

In [None]:
pd.date_range(start="2023-01-01", periods=12, freq="M")

In [None]:
pd.date_range(start="2023-01-01", periods=12, freq="M").to_numpy()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Generate date data
dates = pd.date_range(start="2023-01-01", periods=12, freq="M").values
values = range(12)

# Plot the data
fig, ax = plt.subplots()
ax.plot(dates, values)

# Format the x-axis to show only the month
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))

# Rotate tick labels for better readability
plt.xticks()
plt.title("Monthly Data")
plt.xlabel("Date")
plt.ylabel("Value")
plt.show()


### Formatting Numbers with a Custom Function

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker

# Generate data
x = range(10)
y = [1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000]

# Plot the data
fig, ax = plt.subplots()
ax.plot(x, y)

# Define a custom formatter for large numbers
def format_large_numbers(x, pos):
    return f"${x/1000:.1f}k"

# Apply the formatter
ax.yaxis.set_major_formatter(mticker.FuncFormatter(format_large_numbers)) # FuncFormatter is used to define a custom formatting function.

plt.title("Custom Y-Axis Formatting")
plt.xlabel("Index")
plt.ylabel("Amount")
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter

# Generate data
x = np.linspace(0, 1, 10)
y = x ** 2

# Define a custom formatting function
def custom_percentage(x, pos):
    return f"{x*100:.1f}%"

# Plot the data
fig, ax = plt.subplots()
ax.plot(x, y)

# Apply the custom formatter
ax.yaxis.set_major_formatter(FuncFormatter(custom_percentage))

plt.title("Custom Percentage Formatting")
plt.xlabel("X values")
plt.ylabel("Y values (Percentage)")
plt.show()

### Percentage Formatting

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import PercentFormatter

# Generate data
x = np.linspace(0, 1, 10)
y = x ** 2

# Plot the data
fig, ax = plt.subplots()
ax.plot(x, y)

# Format the y-axis as percentages
ax.yaxis.set_major_formatter(PercentFormatter(xmax=1)) # PercentFormatter(xmax=1) assumes the values are normalized (0 to 1) and formats them as percentages (e.g., 0.5 â†’ 50%).

plt.title("Percentage Formatting")
plt.xlabel("X values")
plt.ylabel("Y values (Percentage)")
plt.show()


### Formatting Bar Chart Percentages

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import PercentFormatter

# Generate data
categories = ["A", "B", "C", "D"]
values = [0.25, 0.35, 0.15, 0.25]

# Plot the data
fig, ax = plt.subplots()
ax.bar(categories, values)

# Format the y-axis as percentages
ax.yaxis.set_major_formatter(PercentFormatter(xmax=1))

plt.title("Bar Chart with Percentages")
plt.xlabel("Categories")
plt.ylabel("Percentage")
plt.show()
