Skip to content
This repository has been archived by the owner on Jul 31, 2023. It is now read-only.

Feature/convert and load #55

Merged
merged 3 commits into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .github/workflows/python-cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,9 @@ jobs:
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

- name: Run Jupyter notebook tests
- name: Run all tests
run: |
export PYTHONPATH="$GITHUB_WORKSPACE"
make testnb

- name: Run Python tests
run: |
make test

- name: Lint with pylint
Expand Down
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
all: init test pylint
all: init testnb test pylint

init:
pip install -r requirements.txt

test:
test: test-nb test-py

test-py:
nosetests --with-coverage -v --cover-package=tfrecorder

testnb:
test-nb:
ls -1 samples/*.ipynb | grep -v '^.*Dataflow.ipynb' | xargs py.test --nbval-lax -p no:python

pylint:
Expand Down
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Using Python interpreter:
```python
import tfrecorder

tfrecorder.create_tfrecords(
tfrecorder.convert(
source='/path/to/data.csv',
output_dir='gs://my/bucket')
```
Expand All @@ -126,10 +126,9 @@ tfrecorder create-tfrecords \
```python
import tfrecorder

tfrecorder.create_tfrecords(
tfrecorder.convert(
source='/path/to/image_dir',
output_dir='gs://my/bucket',
)
output_dir='gs://my/bucket')
```

The image directory should have the following general structure:
Expand Down Expand Up @@ -175,8 +174,9 @@ Using Python interpreter:
```python
import tfrecorder

tfrecorder.check_tfrecords(
file_pattern='/path/to/tfrecords/train*.tfrecord.gz',
tfrecorder.inspect(
tfrecord_dir='/path/to/tfrecords/',
split='TRAIN',
num_records=5,
output_dir='/tmp/output')
```
Expand All @@ -187,8 +187,9 @@ representing the images encoded into TFRecords.
Using the command line:

```bash
tfrecorder check-tfrecords \
--file_pattern=/path/to/tfrecords/train*.tfrecord.gz \
tfrecorder inspect \
--tfrecord-dir=/path/to/tfrecords/ \
--split='TRAIN' \
cfezequiel marked this conversation as resolved.
Show resolved Hide resolved
--num_records=5 \
--output_dir=/tmp/output
```
Expand Down
714 changes: 602 additions & 112 deletions samples/Basic-TFRecorder-Usage.ipynb

Large diffs are not rendered by default.

39 changes: 19 additions & 20 deletions samples/Convert-image-directory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,27 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Error importing tfx_bsl_extension.arrow.array_util. Some tfx_bsl functionalities are not available"
]
}
],
"source": [
"import os\n",
"import pathlib\n",
Expand All @@ -52,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -62,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -128,13 +127,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'job_id': 'DirectRunner', 'metrics': {'rows': 6, 'good_images': 6, 'bad_images': None}, 'tfrecord_dir': '/tmp/tfrecords/tfrecorder-20201022-120722-create-tfrecords'}\n"
"{'job_id': 'DirectRunner', 'metrics': {'rows': 6, 'good_images': 6, 'bad_images': None}, 'tfrecord_dir': '/tmp/tfrecords/tfrecorder-20201027-173455-create-tfrecords'}\n"
]
}
],
"source": [
"output_dir = pathlib.Path('/tmp/tfrecords')\n",
"results = tfrecorder.create_tfrecords(str(image_dir), output_dir)\n",
"results = tfrecorder.convert(str(image_dir), output_dir)\n",
"print(results)"
]
},
Expand All @@ -147,7 +146,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -156,7 +155,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"metadata": {
"pycharm": {
"name": "#%%\n"
Expand Down Expand Up @@ -198,4 +197,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
14 changes: 7 additions & 7 deletions samples/Convert-structured-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
"output_type": "stream",
"text": [
"Downloading data from https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv\n",
"49152/44225 [=================================] - 0s 6us/step\n"
"49152/44225 [=================================] - 1s 12us/step\n"
]
}
],
Expand Down Expand Up @@ -234,7 +234,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -298,7 +298,7 @@
}
],
"source": [
"results = tfrecorder.create_tfrecords(\n",
"results = tfrecorder.convert(\n",
" df, \n",
" './tfrecords', \n",
" schema=input_schema.Schema({\n",
Expand All @@ -317,14 +317,14 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'job_id': 'DirectRunner', 'metrics': {'rows': 887, 'good_images': None, 'bad_images': None}, 'tfrecord_dir': './tfrecords/tfrecorder-20201022-120403-create-tfrecords'}\n"
"{'job_id': 'DirectRunner', 'metrics': {'rows': 887, 'good_images': None, 'bad_images': None}, 'tfrecord_dir': './tfrecords/tfrecorder-20201027-173544-create-tfrecords'}\n"
]
}
],
Expand All @@ -341,7 +341,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -350,7 +350,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down
9 changes: 8 additions & 1 deletion samples/Loading-a-TF-Dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"\n",
"This notebook briefly demonstrates how to load a TF Dataset from TFRecord files generated by TFRecorder.\n",
"Note that currently, the TFRecord files must be in a directory on your local machine.\n",
"The directory is expected to have the following structure, based on TFRecorder's `create_tfrecords` default output:\n",
"The directory is expected to have the following structure, based on TFRecorder's `convert` default output:\n",
"```\n",
"tfrecord_dir/\n",
" train-*.tfrecord.gz\n",
Expand Down Expand Up @@ -109,6 +109,13 @@
"for d in train:\n",
" print(d['image_name'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
8 changes: 5 additions & 3 deletions tfrecorder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
# limitations under the License.

"""Imports."""

from tfrecorder import accessor
from tfrecorder.client import create_tfrecords
from tfrecorder.check import check_tfrecords
from tfrecorder.dataset import load
from tfrecorder.converter import convert
from tfrecorder.dataset_loader import load
from tfrecorder.converter import convert_and_load
from tfrecorder.utils import inspect
4 changes: 2 additions & 2 deletions tfrecorder/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import pandas as pd
from IPython.core import display

from tfrecorder import client
from tfrecorder import converter
from tfrecorder import constants
from tfrecorder import input_schema

Expand Down Expand Up @@ -86,7 +86,7 @@ def to_tfr(
display.HTML(
'<b>Logging output to /tmp/{} </b>'.format(constants.LOGFILE)))

r = client.create_tfrecords(
r = converter.convert(
self._df,
output_dir=output_dir,
schema=schema,
Expand Down
8 changes: 4 additions & 4 deletions tfrecorder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@

import fire

from tfrecorder import client
from tfrecorder import check
from tfrecorder import converter
from tfrecorder import utils


def main():
"""Entry point for command-line interface."""

fire.Fire({
'create-tfrecords': client.create_tfrecords,
'check-tfrecords': check.check_tfrecords,
'convert': converter.convert,
'inspect': utils.inspect,
})


Expand Down
42 changes: 0 additions & 42 deletions tfrecorder/common.py

This file was deleted.

Loading