In [1]:
from unittest.mock import patch
from file_processing import Directory

In [3]:
with patch('file_processing.file.File', autospec=True) as mock_file:
    dir1 = Directory('tests/resources/directory_test_files')
    
    # print([file.processor.__dict__ for file in dir1.get_files()])
    instance = mock_file.return_value
    instance.process.return_value = None
    instance.file_path.return_value = ''
    instance.file_name.return_value = ''
    instance.extension.return_value = ''
    instance.owner.return_value = ''
    instance.size.return_value = 10000000
    instance.modification_time.return_value = 10000000
    instance.access_time.return_value = 10000000
    instance.creation_time.return_value = 10000000
    instance.parent_directory.return_value = ''
    instance.permissions.return_value = 777 
    instance.is_file.return_value = True
    instance.is_symlink.return_value = False
    instance.absolute_path.return_value = ''
    instance.metadata.return_value = {}
    dir1.generate_report('temp.csv', open_files=False)
    for call in mock_file.mock_calls:
        _, kwargs = call[1], call[2]
        assert kwargs.get('open_file') is False, "File was opened when it should not have been"

Processing files: 100%|██████████| 15/15 [00:00<00:00, 2149.09file/s]


KeyError: 'size'

In [12]:
import re
import os
from unittest.mock import patch
from file_processing import File

@patch('pytesseract.image_to_string')
def test_ocr_processing_success(mock_tesseract):
    image_path = os.path.normpath('tests\\resources\\test_files\\test_ocr_text.jpg')
    mock_tesseract.return_value = 'Test OCR'
    file = File(image_path, use_ocr=True)
    result = re.sub('[^A-Za-z0-9!? ]+', '', file.metadata['ocr_text'])

    assert result == 'Test OCR'

    _, file_extension = os.path.splitext(file.file_name)

    if file_extension != '.pdf':
        mock_tesseract.assert_called_once_with(image_path)

test_ocr_processing_success()

In [20]:
import whisper

@patch('whisper.transcribe')
def transcribe(mock_whisper):
    mock_whisper.return_value = {'text': 'text'}
    return whisper.transcribe(
        model=whisper.load_model('base'),
        audio='tests/resources/test_files/sample_speech.aiff',
        fp16=False
    )

transcribe()

'text'

In [2]:
from file_processing import File

File('tests/resources/test_files/sample_speech.aiff', use_transcriber=True).metadata['text']

' and thank you for your continued support. Thank you.'

In [40]:
from unittest.mock import patch
import whisper
from file_processing import File


def test_mock_transcription(path, transcription):
    mocked_value = {
        'text': transcription,
        'language': 'en'
    }
    with patch('whisper.transcribe', return_value=mocked_value) as mock_transcribe:
        audio_file = File(path, use_transcriber=True)
        mock_transcribe.assert_called()
        assert audio_file.metadata['text'] == transcription

test_mock_transcription(
    path='tests/resources/test_files/sample_speech.aiff',
    transcription='mocked text'
)
