Skip to content

Commit 1b53145

Browse files
authored
Merge pull request #16 from arpitHub/main
Images for pytest post
2 parents b8de326 + 6a6ed10 commit 1b53145

File tree

5 files changed

+19
-12
lines changed

5 files changed

+19
-12
lines changed
File renamed without changes.

_posts/2024-04-07-unit-testing-datascience.md

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
layout: post
33
title: Enhancing Data Science Workflow with Unit Testing
44
subtitle: Enhancing Data Science Workflow with Unit Testing
5-
cover-img: /assets/img/path.jpg
6-
thumbnail-img: /assets/img/thumb.png
7-
share-img: /assets/img/path.jpg
5+
cover-img: /assets/img/pytest_1.jpg
6+
thumbnail-img: /assets/img/pytest_thumb.png
7+
share-img: /assets/img/pytest_2.jpg
88
gh-repo: arpithub/arpithub.github.io
99
gh-badge: [star, fork, follow]
1010
tags: [datascience,testing,pytest,ml]
@@ -25,8 +25,7 @@ data-science-project/
2525
├── src/
2626
│ ├── preprocessing.py
2727
│ └── model.py
28-
29-
├── tests/
28+
│── tests/
3029
│ ├── test_preprocessing.py
3130
│ └── test_model.py
3231
@@ -81,14 +80,19 @@ Now, create `tests/test_preprocessing.py` to write unit tests for the preprocess
8180

8281
```python
8382
import pytest
83+
from pathlib import Path
8484
from src.preprocessing import load_iris_dataset, preprocess_data
8585

86+
data_dir = Path(__file__).parent.parent / 'data' # Navigate up to the project root
87+
iris_path = data_dir / 'iris.csv'
88+
89+
8690
@pytest.fixture
8791
def iris_data():
88-
return load_iris_dataset('data/iris.csv')
92+
return load_iris_dataset(iris_path)
8993

9094
def test_load_iris_dataset():
91-
df = load_iris_dataset('data/iris.csv')
95+
df = load_iris_dataset(iris_path)
9296
assert not df.empty
9397

9498
def test_preprocess_data(iris_data):
@@ -97,16 +101,16 @@ def test_preprocess_data(iris_data):
97101
assert 'species' in preprocessed_df.columns
98102

99103
def test_missing_values():
100-
df = iris_data()
104+
df = load_iris_dataset(iris_path)
101105
assert not df.isnull().values.any(), "Dataset contains missing values"
102106

103107
def test_no_duplicates():
104-
df = iris_data()
108+
df = load_iris_dataset(iris_path)
105109
assert not df.duplicated().any(), "Dataset contains duplicate records"
106110

107111

108112
def test_column_datatypes():
109-
df = iris_data()
113+
df = load_iris_dataset(iris_path)
110114
expected_datatypes = {
111115
'sepal length (cm)': 'float64',
112116
'sepal width (cm)': 'float64',
@@ -145,12 +149,15 @@ Now, create `tests/test_model.py` to write unit tests for the model training and
145149

146150
```python
147151
import pytest
152+
from pathlib import Path
148153
from src.model import train_and_evaluate_model
149154
from src.preprocessing import load_iris_dataset, preprocess_data
155+
data_dir = Path(__file__).parent.parent / 'data' # Navigate up to the project root
156+
iris_path = data_dir / 'iris.csv'
150157

151158
@pytest.fixture
152159
def preprocessed_iris_data():
153-
df = load_iris_dataset('data/iris.csv')
160+
df = load_iris_dataset(iris_path)
154161
return preprocess_data(df)
155162

156163
def test_train_and_evaluate_model(preprocessed_iris_data):
@@ -159,7 +166,7 @@ def test_train_and_evaluate_model(preprocessed_iris_data):
159166
```
160167

161168
#### Running the Tests
162-
To run the tests using Pytest, navigate to the project directory and execute:
169+
To run the tests using Pytest, navigate to the `tests` directory and execute:
163170

164171
```bash
165172
pytest

assets/img/pytest_1.jpg

300 KB
Loading

assets/img/pytest_2.jpg

1.69 MB
Loading

assets/img/pytest_thumb.png

2.41 MB
Loading

0 commit comments

Comments
 (0)