In [None]:
import pandas as pd

In [None]:
toyota_sales = pd.read_csv('data/car_sales/toyota_sales_data.csv')

# Range - The Simplest Measure of Spread

**Range = Maximum - Minimum**

## Advantages:
- Very easy to calculate and understand
- Shows the full span of your data

## Limitations:
- Only uses 2 values (ignores everything in between)
- Heavily affected by outliers
- Doesn't tell you about data distribution

In [None]:
# Range for sale_amount
sale_min = toyota_sales['sale_amount'].min()
sale_max = toyota_sales['sale_amount'].max()
sale_range = sale_max - sale_min

In [None]:
print(f"Minimum sale: ${sale_min:,.2f}")
print(f"Maximum sale: ${sale_max:,.2f}")
print(f"Range: ${sale_range:,.2f}")

In [None]:
# Range by car model
range_by_model = toyota_sales.groupby('car_model')['sale_amount'].agg([
    ('Min', 'min'),
    ('Max', 'max'),
    ('Range', lambda x: x.max() - x.min()),
    ('Count', 'count')
]).round(0)

In [None]:

range_by_model.sort_values('Range', ascending=False)

## Range vs Standard Deviation: Key Differences

**Range:**
- Uses only 2 values (min and max)
- Affected heavily by single outlier
- Easy to calculate

**Standard Deviation:**
- Uses ALL values
- Shows typical variation from mean
- More informative about distribution

In [None]:
# Compare range and standard deviation
comparison = toyota_sales.groupby('car_model')['sale_amount'].agg([
    ('Mean', 'mean'),
    ('Range', lambda x: x.max() - x.min()),
    ('Std_Dev', 'std')
]).round(0)

In [None]:

comparison.sort_values('Range', ascending=False)

In [None]:
# Show why range can be misleading
print("=== CAMRY ===")
camry = toyota_sales[toyota_sales['car_model'] == 'Camry']['sale_amount']
print(f"Range: ${camry.max() - camry.min():,.2f}")
print(f"Std Dev: ${camry.std():,.2f}")
print(f"Mean: ${camry.mean():,.2f}")

In [None]:
# Show the distribution
print(f"\nValues within $2,000 of mean: {((camry >= camry.mean()-2000) & (camry <= camry.mean()+2000)).sum()}")
print(f"Total values: {len(camry)}")

## When to Use Range

**Range is useful for:**
- Quick, rough sense of data span
- Quality control (checking if values fall within acceptable limits)
- Simple reports for non-technical audiences

**Use Std Dev instead when:**
- You need to understand typical variation
- You want to account for all data points
- You're doing statistical analysis

In [None]:
# Practical example: Quality control check
print("=== QUALITY CONTROL CHECK ===")
print(f"Expected price range: $20,000 to $50,000")
print(f"Actual min: ${toyota_sales['sale_amount'].min():,.2f}")
print(f"Actual max: ${toyota_sales['sale_amount'].max():,.2f}")
print(f"""\nAll sales within expected range: {
    (toyota_sales['sale_amount'] >= 20000).all() 
    and (toyota_sales['sale_amount'] <= 50000).all()}""")

## Summary: Range

**Formula:** Max - Min

**Pros:**
- Easy to calculate and understand
- Shows full data span
- Good for quick checks

**Cons:**
- Only uses 2 values
- Ignores distribution of data
- Heavily affected by outliers

**Bottom line:** Use range for quick checks, use standard deviation for real analysis.

---

## Section 1 of Statistics Complete! ðŸŽ‰

You've now learned all the key descriptive statistics:
- **Central tendency:** Mean, Median, Mode
- **Spread:** Variance, Standard Deviation, Range
- **Summary:** .describe() method

**Next up:** Section 2 - Advanced Statistical Analysis!