# CSV Processor Pro: Data Analysis Dashboard
This notebook provides an interactive interface to analyze and visualize your CSV data.

--- 
### ‚ö†Ô∏è 1. SETUP: THE "ULTIMATE FIX" CELL
Run this cell! It will automatically install missing libraries and fix the file paths for you.

In [None]:
import os
import sys
from pathlib import Path

# 1. AUTO-INSTALL MISSING LIBRARIES
try:
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    print("‚úÖ Core libraries (Pandas, Matplotlib) are already installed.")
except ImportError:
    print("üöÄ Installing missing libraries (Pandas, Matplotlib, Seaborn)... this take a moment.")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "matplotlib", "seaborn", "faker"])
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    print("‚úÖ Libraries installed successfully!")

# 2. ROBUST FOLDER DETECTION
# We look for the 'scripts' folder by going upwards from this notebook
def find_root():
    p = Path(os.getcwd())
    for _ in range(3):  # Look up to 3 levels up
        if (p / 'scripts').is_dir():
            return p
        p = p.parent
    raise Exception("‚ùå ROOT NOT FOUND! Please ensure you are running this from within the 'csv_processor' project.")

project_root = find_root()
project_root_str = str(project_root.resolve())

# 3. ADD TO PYTHON PATH
if project_root_str not in sys.path:
    sys.path.insert(0, project_root_str)

print(f"üìÇ Project Root: {project_root_str}")

# 4. TEST IMPORT
try:
    from scripts.kpi_analyzer import calculate_kpis
    from scripts.visualizer import generate_visualizations
    print("‚úÖ SUCCESS: 'scripts' module found and loaded!")
except ImportError as e:
    print(f"‚ùå ERROR: Still cannot find scripts. Error: {e}")
    print(f"Debug info: Current sys.path contains root? {project_root_str in sys.path}")

# 5. LOAD DATA
data_path = os.path.join(project_root_str, 'data', 'large_sample.csv')
if os.path.exists(data_path):
    df = pd.read_csv(data_path)
    print(f"‚úÖ SUCCESS: Loaded {len(df)} rows from CSV.")
    display(df.head())
else:
    print(f"‚ùå ERROR: Data not found at {data_path}. Run generate_sample.py first!")

### üìä 2. Key Performance Indicators (KPIs)

In [None]:
if 'df' in locals():
    kpis = calculate_kpis(df)
    print("--- Summary KPIs ---")
    for k, v in kpis.items():
        print(f"{k.replace('_', ' ').title()}: {v:,.2f}" if isinstance(v, float) else f"{k.replace('_', ' ').title()}: {v}")

### üìà 3. Visualizations

In [None]:
if 'df' in locals():
    sns.set_theme(style="whitegrid")
    plt.figure(figsize=(10, 6))
    sns.histplot(df['salary'], kde=True, color='green')
    plt.title('Overall Salary Distribution')
    plt.show()