22layout : post
33title : Enhancing Data Science Workflow with Unit Testing
44subtitle : Enhancing Data Science Workflow with Unit Testing
5- cover-img : /assets/img/path .jpg
6- thumbnail-img : /assets/img/thumb .png
7- share-img : /assets/img/path .jpg
5+ cover-img : /assets/img/pytest_1 .jpg
6+ thumbnail-img : /assets/img/pytest_thumb .png
7+ share-img : /assets/img/pytest_2 .jpg
88gh-repo : arpithub/arpithub.github.io
99gh-badge : [star, fork, follow]
1010tags : [datascience,testing,pytest,ml]
@@ -25,8 +25,7 @@ data-science-project/
2525├── src/
2626│ ├── preprocessing.py
2727│ └── model.py
28- │
29- ├── tests/
28+ │── tests/
3029│ ├── test_preprocessing.py
3130│ └── test_model.py
3231│
@@ -81,14 +80,19 @@ Now, create `tests/test_preprocessing.py` to write unit tests for the preprocess
8180
8281``` python
8382import pytest
83+ from pathlib import Path
8484from src.preprocessing import load_iris_dataset, preprocess_data
8585
86+ data_dir = Path(__file__ ).parent.parent / ' data' # Navigate up to the project root
87+ iris_path = data_dir / ' iris.csv'
88+
89+
8690@pytest.fixture
8791def iris_data ():
88- return load_iris_dataset(' data/iris.csv ' )
92+ return load_iris_dataset(iris_path )
8993
9094def test_load_iris_dataset ():
91- df = load_iris_dataset(' data/iris.csv ' )
95+ df = load_iris_dataset(iris_path )
9296 assert not df.empty
9397
9498def test_preprocess_data (iris_data ):
@@ -97,16 +101,16 @@ def test_preprocess_data(iris_data):
97101 assert ' species' in preprocessed_df.columns
98102
99103def test_missing_values ():
100- df = iris_data( )
104+ df = load_iris_dataset(iris_path )
101105 assert not df.isnull().values.any(), " Dataset contains missing values"
102106
103107def test_no_duplicates ():
104- df = iris_data( )
108+ df = load_iris_dataset(iris_path )
105109 assert not df.duplicated().any(), " Dataset contains duplicate records"
106110
107111
108112def test_column_datatypes ():
109- df = iris_data( )
113+ df = load_iris_dataset(iris_path )
110114 expected_datatypes = {
111115 ' sepal length (cm)' : ' float64' ,
112116 ' sepal width (cm)' : ' float64' ,
@@ -145,12 +149,15 @@ Now, create `tests/test_model.py` to write unit tests for the model training and
145149
146150``` python
147151import pytest
152+ from pathlib import Path
148153from src.model import train_and_evaluate_model
149154from src.preprocessing import load_iris_dataset, preprocess_data
155+ data_dir = Path(__file__ ).parent.parent / ' data' # Navigate up to the project root
156+ iris_path = data_dir / ' iris.csv'
150157
151158@pytest.fixture
152159def preprocessed_iris_data ():
153- df = load_iris_dataset(' data/iris.csv ' )
160+ df = load_iris_dataset(iris_path )
154161 return preprocess_data(df)
155162
156163def test_train_and_evaluate_model (preprocessed_iris_data ):
@@ -159,7 +166,7 @@ def test_train_and_evaluate_model(preprocessed_iris_data):
159166```
160167
161168#### Running the Tests
162- To run the tests using Pytest, navigate to the project directory and execute:
169+ To run the tests using Pytest, navigate to the ` tests ` directory and execute:
163170
164171``` bash
165172pytest
0 commit comments