ETL Classes
First, define our ETL classes:
Extract: Methods to extract data from CSV files and SQL databases.
Transform: Methods to clean and normalize data.
Load: Methods to load data into CSV files and SQL databases.

In [None]:
import pandas as pd
from sqlalchemy import create_engine

class Extract:
    def from_csv(self, file_path):
        return pd.read_csv(file_path)

    def from_sql(self, connection_string, query):
        engine = create_engine(connection_string)
        return pd.read_sql(query, engine)

class Transform:
    def clean_data(self, df):
        df.dropna(inplace=True)
        return df

    def normalize_data(self, df, columns):
        for column in columns:
            df[column] = (df[column] - df[column].mean()) / df[column].std()
        return df

class Load:
    def to_csv(self, df, file_path):
        df.to_csv(file_path, index=False)

    def to_sql(self, df, connection_string, table_name):
        engine = create_engine(connection_string)
        df.to_sql(table_name, engine, if_exists='replace', index=False)


Pytest Code
Now, let’s write some pytest code to test these classes:
Fixtures: sample_data fixture provides sample data for testing.
Tests: Each test function checks a specific functionality of the ETL classes using mock objects to avoid actual file/database operations.

In [None]:
import pytest
import pandas as pd
from etl import Extract, Transform, Load

@pytest.fixture
def sample_data():
    data = {
        'Name': ['Alice', 'Bob', 'Charlie', None],
        'Age': [25, None, 22, 29],
        'Salary': [70000, 80000, 90000, 100000]
    }
    return pd.DataFrame(data)

def test_extract_from_csv(mocker):
    mocker.patch('pandas.read_csv', return_value=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}))
    extract = Extract()
    df = extract.from_csv('dummy_path.csv')
    assert not df.empty

def test_transform_clean_data(sample_data):
    transform = Transform()
    df_clean = transform.clean_data(sample_data)
    assert df_clean.isnull().sum().sum() == 0

def test_transform_normalize_data(sample_data):
    transform = Transform()
    df_normalized = transform.normalize_data(sample_data, ['Age', 'Salary'])
    assert df_normalized['Age'].mean() == pytest.approx(0, abs=1e-2)
    assert df_normalized['Salary'].mean() == pytest.approx(0, abs=1e-2)

def test_load_to_csv(mocker, sample_data):
    mocker.patch('pandas.DataFrame.to_csv')
    load = Load()
    load.to_csv(sample_data, 'dummy_path.csv')
    pd.DataFrame.to_csv.assert_called_once_with('dummy_path.csv', index=False)

def test_load_to_sql(mocker, sample_data):
    mocker.patch('pandas.DataFrame.to_sql')
    load = Load()
    load.to_sql(sample_data, 'sqlite:///dummy.db', 'dummy_table')
    pd.DataFrame.to_sql.assert_called_once_with('dummy_table', mocker.ANY, if_exists='replace', index=False)
