# Matplotlib + pandas: Beginner Tutorial — **Combined Stress Datasets**

This single notebook is **beginner‑friendly** and follows the exact steps you asked for, **for each dataset**:

1. **Loading the dataset with pandas**  
2. **Setting the colors** (global color cycle)  
3. **Setting the y axis** (limits, ticks, scale)  
4. **Setting the x axis** (limits, ticks, scale)  
5. **Axis names** (x‑label, y‑label, title)  
6. **Setting the colors — choices shown in *commented code***  

We generate a **histogram**, a **scatter plot**, and a **bar plot** using only `matplotlib.pyplot`.


In [None]:
# Shared helpers for both datasets (auto‑picks sensible columns)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler

def choose_columns(df: pd.DataFrame):
    numeric_cols = list(df.select_dtypes(include=[np.number]).columns)
    # Treat object/bool/category as categorical-like (and low-cardinality numbers too)
    cat_like = list(df.select_dtypes(include=['object','bool','category']).columns)
    low_card_num = [c for c in numeric_cols if df[c].nunique(dropna=True) <= 10]
    categorical_cols = list(dict.fromkeys(cat_like + low_card_num))  # preserve order, dedup

    hist_col = numeric_cols[0] if numeric_cols else None
    scatter_x = numeric_cols[0] if len(numeric_cols) >= 1 else None
    scatter_y = numeric_cols[1] if len(numeric_cols) >= 2 else None
    bar_cat = categorical_cols[0] if categorical_cols else None
    bar_val = numeric_cols[0] if numeric_cols else None

    return {
        "numeric_cols": numeric_cols,
        "categorical_cols": categorical_cols,
        "hist_col": hist_col,
        "scatter_x": scatter_x,
        "scatter_y": scatter_y,
        "bar_cat": bar_cat,
        "bar_val": bar_val
    }

def set_global_colors():
    # Global color cycle for consistent styling (edit this list to your taste)
    plt.rcParams['axes.prop_cycle'] = cycler(color=[
        '#1f77b4',  # blue
        '#ff7f0e',  # orange
        '#2ca02c',  # green
        '#d62728',  # red
        '#9467bd',  # purple
        '#8c564b',  # brown
    ])
    # Preview the cycle (optional)
    plt.figure()
    for i in range(6):
        plt.plot([0, 1], [i, i], label=f'color {i}')
    plt.title('Preview of Global Color Cycle')
    plt.legend()
    plt.show()

## A) **StressLevelDataset** — step‑by‑step

### 1) Loading the dataset with pandas

In [None]:
csv_path_A = r"./Data/StressLevelDataset.csv"
df_A = pd.read_csv(csv_path_A)
df_A.head()

### 2) Setting the colors (global color cycle)

In [None]:
set_global_colors()

### 3) Setting the **y axis** (Histogram)

In [None]:
picks_A = choose_columns(df_A)
print("Auto-picked for A:", picks_A)

hist_col = picks_A['hist_col']
if hist_col is not None:
    vals = df_A[hist_col].dropna()
    plt.figure()
    n, bins, patches = plt.hist(vals, bins=20, alpha=0.9, edgecolor='black')
    plt.ylim(0, max(n) + 2)
    step = max(1, int(max(n)//6) or 1)
    plt.yticks(range(0, int(max(n))+3, step))
    plt.yscale('linear')

    plt.xlabel(hist_col)
    plt.ylabel('Count')
    plt.title('Histogram: ' + str(hist_col))
    plt.grid(True, linestyle='--', alpha=0.4)
    plt.show()
else:
    print("No numeric column found for histogram in A.")

### 4) Setting the **x axis** (Scatter Plot)

In [None]:
xcol, ycol = picks_A['scatter_x'], picks_A['scatter_y']
if xcol is not None and ycol is not None:
    x = df_A[xcol]; y = df_A[ycol]
    plt.figure()
    sc = plt.scatter(x, y, s=60, edgecolors='black', linewidths=0.5)

    xmin, xmax = x.min(), x.max()
    pad = 0.02 * (xmax - xmin if xmax > xmin else 1.0)
    plt.xlim(xmin - pad, xmax + pad)
    plt.xticks(np.round(np.linspace(xmin, xmax, 6), 2))
    plt.xscale('linear')

    plt.xlabel(xcol)
    plt.ylabel(ycol)
    plt.title(f"Scatter: {xcol} vs {ycol}")
    plt.grid(True, linestyle=':', alpha=0.5)
    plt.show()
else:
    print("Need at least two numeric columns for scatter in A.")

### 5) Axis names (Bar Plot)

In [None]:
bar_cat, bar_val = picks_A['bar_cat'], picks_A['bar_val']
if bar_cat is not None and bar_val is not None:
    top_cats = df_A[bar_cat].value_counts(dropna=False).index[:10]
    subset = df_A[df_A[bar_cat].isin(top_cats)]
    mean_vals = subset.groupby(bar_cat)[bar_val].mean()

    plt.figure()
    plt.bar(mean_vals.index.astype(str), mean_vals.values, edgecolor='black')
    plt.xlabel(bar_cat)
    plt.ylabel('Mean of ' + str(bar_val))
    plt.title(f"Mean {bar_val} by {bar_cat}")
    plt.grid(axis='y', linestyle='-.', alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
elif bar_cat is not None:
    counts = df_A[bar_cat].value_counts(dropna=False).head(10)
    plt.figure()
    plt.bar(counts.index.astype(str), counts.values, edgecolor='black')
    plt.xlabel(bar_cat)
    plt.ylabel('Count')
    plt.title('Category counts: ' + str(bar_cat))
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='-.', alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No categorical-like column found for a bar plot in A.")

## B) **Stress_Dataset** — step‑by‑step

### 1) Loading the dataset with pandas

In [None]:
csv_path_B = r"/mnt/data/Stress_Dataset.csv"
df_B = pd.read_csv(csv_path_B)
df_B.head()

### 2) Setting the colors (global color cycle)

In [None]:
set_global_colors()

### 3) Setting the **y axis** (Histogram)

In [None]:
picks_B = choose_columns(df_B)
print("Auto-picked for B:", picks_B)

hist_col = picks_B['hist_col']
if hist_col is not None:
    vals = df_B[hist_col].dropna()
    plt.figure()
    n, bins, patches = plt.hist(vals, bins=20, alpha=0.9, edgecolor='black')
    plt.ylim(0, max(n) + 2)
    step = max(1, int(max(n)//6) or 1)
    plt.yticks(range(0, int(max(n))+3, step))
    plt.yscale('linear')

    plt.xlabel(hist_col)
    plt.ylabel('Count')
    plt.title('Histogram: ' + str(hist_col))
    plt.grid(True, linestyle='--', alpha=0.4)
    plt.show()
else:
    print("No numeric column found for histogram in B.")

### 4) Setting the **x axis** (Scatter Plot)

In [None]:
xcol, ycol = picks_B['scatter_x'], picks_B['scatter_y']
if xcol is not None and ycol is not None:
    x = df_B[xcol]; y = df_B[ycol]
    plt.figure()
    sc = plt.scatter(x, y, s=60, edgecolors='black', linewidths=0.5)

    xmin, xmax = x.min(), x.max()
    pad = 0.02 * (xmax - xmin if xmax > xmin else 1.0)
    plt.xlim(xmin - pad, xmax + pad)
    plt.xticks(np.round(np.linspace(xmin, xmax, 6), 2))
    plt.xscale('linear')

    plt.xlabel(xcol)
    plt.ylabel(ycol)
    plt.title(f"Scatter: {xcol} vs {ycol}")
    plt.grid(True, linestyle=':', alpha=0.5)
    plt.show()
else:
    print("Need at least two numeric columns for scatter in B.")

### 5) Axis names (Bar Plot)

In [None]:
bar_cat, bar_val = picks_B['bar_cat'], picks_B['bar_val']
if bar_cat is not None and bar_val is not None:
    top_cats = df_B[bar_cat].value_counts(dropna=False).index[:10]
    subset = df_B[df_B[bar_cat].isin(top_cats)]
    mean_vals = subset.groupby(bar_cat)[bar_val].mean()

    plt.figure()
    plt.bar(mean_vals.index.astype(str), mean_vals.values, edgecolor='black')
    plt.xlabel(bar_cat)
    plt.ylabel('Mean of ' + str(bar_val))
    plt.title(f"Mean {bar_val} by {bar_cat}")
    plt.grid(axis='y', linestyle='-.', alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
elif bar_cat is not None:
    counts = df_B[bar_cat].value_counts(dropna=False).head(10)
    plt.figure()
    plt.bar(counts.index.astype(str), counts.values, edgecolor='black')
    plt.xlabel(bar_cat)
    plt.ylabel('Count')
    plt.title('Category counts: ' + str(bar_cat))
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', linestyle='-.', alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No categorical-like column found for a bar plot in B.")

## 6) Setting the colors — choices in **commented code**

In [None]:
# =========================
# COLOR CHOICES (reference)
# =========================

# 1) Short color codes (Matlab-style):
# 'b','g','r','c','m','y','k','w'
# plt.plot(x, y, 'r--')

# 2) Named CSS colors:
# 'tab:blue','tab:orange','tab:green','tab:red','tab:purple','tab:brown','tab:pink','tab:gray'

# 3) Hex colors:
# '#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd'

# 4) Colormaps for scalar → color:
# plt.scatter(x, y, c=values, cmap='viridis')  # also: 'plasma','inferno','magma','cividis','turbo'

# 5) Global color cycle:
# from cycler import cycler
# plt.rcParams['axes.prop_cycle'] = cycler(color=['#1f77b4','#ff7f0e','#2ca02c'])

# 6) Lines & markers:
# linestyle: '-' '--' '-.' ':'
# marker: 'o' 's' '^' 'x' 'D' 'P' '*'
# linewidth (lw)=2, markersize (ms)=8, markeredgecolor (mec)='black', markerfacecolor (mfc)='white'
