In [0]:
%run ../utils/helpers.py

In [0]:
import pytest
from unittest.mock import MagicMock
from your_package.readers import read_volume_files


@pytest.fixture
def mock_df():
    df = MagicMock()
    df.withColumnsRenamed.return_value = df
    return df


@pytest.fixture
def patch_spark(monkeypatch, mock_df):
    mock_spark = MagicMock()
    monkeypatch.setattr("your_package.readers.spark", mock_spark)

    # format(...) → options(...) → load(...) → df
    mock_spark.read.format.return_value.options.return_value.load.return_value = mock_df

    # options(...) → excel(...) → df
    mock_spark.read.options.return_value.excel.return_value = mock_df

    return mock_spark


@pytest.mark.parametrize(
    "path,expected_format,is_excel",
    [
        ("file.csv",  "csv",  False),
        ("file.json", "json", False),
        ("file.xlsx", None,   True),
    ]
)
def test_read_various_formats(path, expected_format, is_excel, patch_spark, mock_df):
    df = read_volume_files(path, column_mapping_dict={"A": "a"})

    assert df is mock_df

    if is_excel:
        patch_spark.read.options.assert_called_with(
            header="true", inferSchema="true"
        )
        patch_spark.read.options.return_value.excel.assert_called_once_with(path)
    else:
        patch_spark.read.format.assert_called_with(expected_format)
        patch_spark.read.format.return_value.options.assert_called_with(
            header="true", inferSchema="true"
        )
        patch_spark.read.format.return_value.options.return_value.load.assert_called_once_with(path)

    mock_df.withColumnsRenamed.assert_called_once_with({"A": "a"})


@pytest.mark.parametrize("bad_path", ["file.txt", "file", "file.parquet"])
def test_unsupported_format_raises(bad_path, patch_spark):
    with pytest.raises(ValueError):
        read_volume_files(bad_path)
