In [0]:
import pytest
from unittest.mock import MagicMock, patch
from pyspark.sql import DataFrame

In [0]:
%run ../utils/helpers

In [0]:
@pytest.fixture
def mock_df():
    df = MagicMock(spec=DataFrame)
    df.withColumnsRenamed.return_value = df
    return df

In [0]:
@patch("utils.reader.spark")
def test_read_csv(mock_spark):
    mock_df = MagicMock(spec=DataFrame)
    mock_df.withColumnsRenamed.return_value = mock_df

    mock_reader = MagicMock()
    mock_reader.options.return_value = mock_reader
    mock_reader.format.return_value = mock_reader
    mock_reader.load.return_value = mock_df
    mock_reader.excel.return_value = mock_df

    mock_spark.read = mock_reader

    result = read_volume_files("/Volumes/demo/file.csv")

    mock_reader.format.assert_called_with("csv")
    mock_reader.options.assert_called()
    mock_reader.load.assert_called_with("/Volumes/demo/file.csv")
    mock_df.withColumnsRenamed.assert_called_once()
    assert result == mock_df

In [0]:
@patch("utils.reader.spark")
def test_read_json(mock_spark):
    mock_df = MagicMock(spec=DataFrame)
    mock_df.withColumnsRenamed.return_value = mock_df

    mock_reader = MagicMock()
    mock_reader.options.return_value = mock_reader
    mock_reader.format.return_value = mock_reader
    mock_reader.load.return_value = mock_df

    mock_spark.read = mock_reader

    result = read_volume_files("/Volumes/demo/file.json")

    mock_reader.format.assert_called_with("json")
    mock_reader.load.assert_called_with("/Volumes/demo/file.json")
    mock_df.withColumnsRenamed.assert_called_once()
    assert result == mock_df

In [0]:
@patch("utils.reader.spark")
def test_read_excel(mock_spark):
    mock_df = MagicMock(spec=DataFrame)
    mock_df.withColumnsRenamed.return_value = mock_df

    mock_reader = MagicMock()
    mock_reader.options.return_value = mock_reader
    mock_reader.excel.return_value = mock_df

    mock_spark.read = mock_reader

    result = read_volume_files("/Volumes/demo/file.xlsx")

    mock_reader.excel.assert_called_with("/Volumes/demo/file.xlsx")
    mock_df.withColumnsRenamed.assert_called_once()
    assert result == mock_df

In [0]:
def test_invalid_format():
    import pytest
    with pytest.raises(ValueError):
        read_volume_files("file.abc")