# Data Exploration - Data Science Koans

Master exploratory data analysis!

## What You Will Learn
- Loading and profiling datasets
- Detecting missing values
- Data type conversions
- Correlation analysis
- Outlier detection

## How to Use
1. Read each koan
2. Complete TODOs
3. Run validation
4. Iterate

In [None]:
# Setup
import sys
sys.path.append('../..')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from koans.core.validator import KoanValidator
from koans.core.progress import ProgressTracker

validator = KoanValidator('03_data_exploration')
tracker = ProgressTracker()
print('Setup complete!')
print(f"Progress: {tracker.get_notebook_progress('03_data_exploration')}%")

## KOAN 3.1: Loading CSV Data
**Objective**: Read files
**Difficulty**: Beginner

In [None]:
def load_csv():    # TODO: Create sample DataFrame    return pd.DataFrame({'A': [1,2,3]})@validator.koan(1, 'Loading CSV Data', difficulty='Beginner')def validate():    result = load_csv()    assert isinstance(result, pd.DataFrame)validate()

## KOAN 3.2: Data Profiling
**Objective**: Understand structure
**Difficulty**: Beginner

In [None]:
def get_info():    df = pd.DataFrame({'A': [1,2,3], 'B': ['x','y','z']})    # TODO: Return tuple (num_rows, num_cols)    pass@validator.koan(2, 'Data Profiling', difficulty='Beginner')def validate():    result = get_info()    assert result == (3, 2)validate()

## KOAN 3.3: Missing Value Detection
**Objective**: Find nulls
**Difficulty**: Beginner

In [None]:
def count_missing():    df = pd.DataFrame({'A': [1, None, 3], 'B': [4, 5, None]})    # TODO: Return total count of missing values    pass@validator.koan(3, 'Missing Value Detection', difficulty='Beginner')def validate():    result = count_missing()    assert result == 2validate()

## KOAN 3.4: Data Type Analysis
**Objective**: Check dtypes
**Difficulty**: Beginner

In [None]:
def analyze_types():    df = pd.DataFrame({'nums': [1,2,3], 'text': ['a','b','c']})    # TODO: Return number of numeric columns    pass@validator.koan(4, 'Data Type Analysis', difficulty='Beginner')def validate():    result = analyze_types()    assert result == 1validate()

## KOAN 3.5: Basic Visualization
**Objective**: Create plots
**Difficulty**: Beginner

In [None]:
def create_histogram():    data = [1, 2, 2, 3, 3, 3, 4, 4, 5]    # TODO: Create histogram and return True    return True@validator.koan(5, 'Basic Visualization', difficulty='Beginner')def validate():    result = create_histogram()    assert result == Truevalidate()

## KOAN 3.6: Correlation Analysis
**Objective**: Calculate correlations
**Difficulty**: Beginner

In [None]:
def calc_correlation():    df = pd.DataFrame({'A': [1,2,3,4,5], 'B': [2,4,6,8,10]})    # TODO: Return correlation between A and B    pass@validator.koan(6, 'Correlation Analysis', difficulty='Beginner')def validate():    result = calc_correlation()    assert result == 1.0validate()

## KOAN 3.7: Unique Values
**Objective**: Count distinct
**Difficulty**: Beginner

In [None]:
def count_unique():    df = pd.DataFrame({'category': ['A','B','A','C','B','A']})    # TODO: Return number of unique values in 'category'    pass@validator.koan(7, 'Unique Values', difficulty='Beginner')def validate():    result = count_unique()    assert result == 3validate()

## KOAN 3.8: Cross-tabulation
**Objective**: Frequency tables
**Difficulty**: Beginner

In [None]:
def create_crosstab():    df = pd.DataFrame({'X': ['A','A','B','B'], 'Y': [1,2,1,2]})    # TODO: Create crosstab of X and Y    return pd.crosstab(df['X'], df['Y'])@validator.koan(8, 'Cross-tabulation', difficulty='Beginner')def validate():    result = create_crosstab()    assert isinstance(result, pd.DataFrame)validate()

## KOAN 3.9: Outlier Detection
**Objective**: Find extremes
**Difficulty**: Beginner

In [None]:
def detect_outliers():    data = [10, 12, 13, 12, 11, 100, 13]    # TODO: Return list of indices where value > mean + 2*std    pass@validator.koan(9, 'Outlier Detection', difficulty='Beginner')def validate():    result = detect_outliers()    assert 5 in resultvalidate()

## KOAN 3.10: Data Quality Report
**Objective**: Comprehensive check
**Difficulty**: Beginner

In [None]:
def quality_report():    df = pd.DataFrame({'A': [1, None, 3], 'B': [4, 5, 6]})    # TODO: Return dict with 'total_rows', 'total_nulls'    pass@validator.koan(10, 'Data Quality Report', difficulty='Beginner')def validate():    result = quality_report()    assert 'total_rows' in result    assert result['total_nulls'] == 1validate()

## Congratulations!

You completed Data Exploration!

In [None]:
progress = tracker.get_notebook_progress('03_data_exploration')
print(f'Final Progress: {progress}%')
if progress == 100:
    print('Excellent! You mastered Data Exploration!')