In [None]:
"""
exploratory_analysis.py

This script performs a minimal data exploration on a CSV file. It:
1. Loads the data from a specified path.
2. Prints a preview of the DataFrame (head).
3. Shows info about data types, memory usage, etc.
4. Checks for missing values and duplicates.
5. Generates basic statistical summaries for numerical columns.

Usage:
------
1. Adjust the CSV path in config.py if needed (default is '../data/network_data.csv').
2. Run:
   python -m scripts.exploratory_analysis
3. Ensure you have pandas installed:
   pip install pandas
"""

import logging
import pandas as pd

# 🔹 Try importing config values
try:
    from scripts.config import DATA_PATH
except ImportError:
    logging.warning("Could not import DATA_PATH from config. Using default path '../data/network_data.csv'.")
    DATA_PATH = "../data/network_data.csv"  # Default fallback

def main():
    """
    Perform basic exploratory data analysis on the dataset.
    """
    
    # 1. Load the network traffic data with error handling
    try:
        df = pd.read_csv(DATA_PATH)
    except FileNotFoundError:
        print(f"Error: Data file '{DATA_PATH}' not found. Please check the path.")
        return
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty. Please provide a valid dataset.")
        return
    except Exception as e:
        print(f"Unexpected error while loading data: {e}")
        return

    # 2. Display the first few rows to understand the structure
    print("=== First 5 Rows of the Data ===")
    print(df.head(), "\n")

    # 3. Get basic information about the dataset
    print("=== DataFrame Info ===")
    print(df.info(), "\n")

    # 4. Check for missing values in each column
    print("=== Missing Values in Each Column ===")
    missing_values = df.isnull().sum()
    print(missing_values[missing_values > 0], "\n")  # Only show columns with missing values

    # 5. Check for duplicate rows
    duplicates = df.duplicated().sum()
    print(f"=== Number of Duplicate Rows: {duplicates} ===\n")

    # 6. Generate basic statistics for numerical columns
    print("=== Basic Statistical Summary ===")
    print(df.describe())

if __name__ == "__main__":
    main()
