Merge pull request #11 from gretelai/colab-support-aw

Colab support aw
gretelai · Apr 30, 2020 · 8def903 · 8def903
2 parents 76ac70a + 4f45c69
commit 8def903
Show file tree

Hide file tree

Showing 6 changed files with 26 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -7,20 +7,26 @@ This package allows developers to quickly get emersed with synthetic data genera
 For example usage, please launch the example Jupyter Notebook and step through the config, train, and generation examples.
 [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/gretelai/gretel-synthetics/blob/master/examples/synthetic_records.ipynb)
 
-**NOTE**: The settings in the Jupyter Notebook are optimized to run on a CPU, so you can get the hang of how things work. We 
-highly recommend running with no `max_char` limitation and at least 30 epochs on a GPU.
+**NOTE**: The settings in our Jupyter Notebook examples are optimized to run on a GPU, which you can experiment with
+for free in Google Colaboratory. If you're running on a CPU, you might want to grab a cup of coffee, 
+or lower `max_lines` and `epochs` to 5000 and 10, respectively.
 
 
 # Getting Started
+By default, we do not install Tensorflow via pip as many developers and cloud services such as Google Colab are
+running customized versions for their hardware. If you wish to pip install Tensorflow along with gretel-synthetics,
+use the [tf] commands below instead.
 
 ```
-pip install -U .
+pip install -U .                     # Do not install Tensorflow by default (assuming you have built a distro for your hardware)
+pip install -U -e ".[tf]"            # Install a pinned version of Tensorflow"
 ```
 
 _or_
 
 ```
-pip install gretel-synthetics
+pip install gretel-synthetics        # Do not install Tensorflow by default (assuming you have built a distro for your hardware)
+pip install gretel-synthetics[tf]    # Install a pinned version of Tensorflow
 ```
 
 _then..._

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.7.0
+0.7.1
diff --git a/examples/synthetic_records.ipynb b/examples/synthetic_records.ipynb
@@ -12,21 +12,19 @@
     "\n",
     "For both training and generating data, we can use the ``config.py`` module and<br>\n",
     "create a ``LocalConfig`` instance that contains all the attributes that we need<br>\n",
-    "for both activities.\n",
-    "\n",
-    "In the below example, we will create a config that can work on a CPU. Performing<br> \n",
-    "operations on a GPU is recommended with more complex settings."
+    "for both activities."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Google Colab support\n",
     "# Note: Click \"Runtime->Change Runtime Type\" set Hardware Accelerator to \"GPU\"\n",
-    "#\n",
+    "# Note: Use pip install gretel-synthetics[tf] to install tensorflow if necessary\n",
+    "# \n",
     "#!pip install gretel-synthetics"
    ]
   },
@@ -44,8 +42,8 @@
     "# The default values for ``max_lines`` and ``epochs`` are better suited for GPUs\n",
     "\n",
     "config = LocalConfig(\n",
-    "    max_lines=5000,  # friendly towards CPUs, set to 0 (zero) to train model on all lines\n",
-    "    epochs=5,  # friendly towards CPUs\n",
+    "    max_lines=0, # use max_lines of training data. Set to 0 (zero) to on all lines in dataset\n",
+    "    epochs=15, # 15-50 epochs with GPU for best performance\n",
     "    vocab_size=15000, # tokenizer model vocabulary size\n",
     "    character_coverage=1.0, # tokenizer model character coverage percent\n",
     "    gen_chars=0, # the maximum number of characters possible per-generated line of text\n",
@@ -60,7 +58,7 @@
     "    dp_l2_norm_clip=1.0, # bound optimizer's sensitivity to individual training points\n",
     "    dp_microbatches=256, # split batches into minibatches for parallelism\n",
     "    checkpoint_dir=(Path.cwd() / 'checkpoints').as_posix(),\n",
-    "    input_data=\"https://gretel-public-website.s3-us-west-2.amazonaws.com/datasets/uber_scooter_rides_1day.csv\" # filepath or S3\n",
+    "    input_data_path=\"https://gretel-public-website.s3-us-west-2.amazonaws.com/datasets/uber_scooter_rides_1day.csv\" # filepath or S3\n",
     ")"
    ]
   },

diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,6 @@ tensorflow==2.1.0
 tensorflow_privacy==0.2.2
 sentencepiece==0.1.85
 smart_open==1.10.0
-tqdm==4.45.0
 pandas==1.0.3
-numpy==1.18.3
+numpy==1.18.3
+tqdm<5.0
diff --git a/setup.py b/setup.py
@@ -19,12 +19,14 @@
     package_dir={'': 'src'},
     packages=find_packages('src'),
     install_requires=[
-        'tensorflow==2.1.0',
         'tensorflow_privacy==0.2.2',
         'sentencepiece==0.1.85',
         'smart_open==1.10.0',
-        'tqdm==4.45.0',
+        'tqdm<5.0',
         'pandas==1.0.3',
         'numpy==1.18.3'
-    ]
+    ],
+    extras_require={
+        'tf': ['tensorflow==2.1.0']
+    }
 )
diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,3 +1,4 @@
+tensorflow==2.1.0
 flake8
 pytest
 pytest-cov