Permalink
Browse files

upgrade to 0.6.6

  • Loading branch information...
1 parent 2f3f0e8 commit 612a27825aa45ad5f37669e519b49b86247dcd50 @chineking chineking committed Dec 9, 2016
Showing with 15,907 additions and 6,497 deletions.
  1. +3 −0 docs/source/api-def.rst
  2. +8 −4 docs/source/api-df.rst
  3. +124 −0 docs/source/base-models-zh.rst
  4. +3 −0 docs/source/base-tables-zh.rst
  5. +1 −1 docs/source/base-zh.rst
  6. +68 −343 docs/source/df-agg-zh.rst
  7. +572 −553 docs/source/df-basic-zh.rst
  8. +23 −40 docs/source/df-debug-instruction-zh.rst
  9. +401 −1,327 docs/source/df-element-zh.rst
  10. +0 −529 docs/source/df-join-union-zh.rst
  11. +146 −0 docs/source/df-merge-zh.rst
  12. +34 −153 docs/source/df-plot-zh.rst
  13. +0 −1,341 docs/source/df-query-zh.rst
  14. +102 −648 docs/source/df-quickstart-zh.rst
  15. +360 −1,030 docs/source/df-sort-distinct-apply-zh.rst
  16. +39 −203 docs/source/df-window-zh.rst
  17. +1 −2 docs/source/df-zh.rst
  18. +1 −1 docs/source/index.rst
  19. +142 −0 docs/source/ml-algo-pub-zh.rst
  20. +41 −0 docs/source/ml-assess-zh.rst
  21. +316 −0 docs/source/ml-basic-zh.rst
  22. +102 −0 docs/source/ml-quickstart-zh.rst
  23. +17 −0 docs/source/ml-zh.rst
  24. +1 −1 docs/source/options-zh.rst
  25. +577 −0 notebooks/ml_intro.ipynb
  26. +1 −1 odps/_version.py
  27. +2 −0 odps/config.py
  28. +1 −0 odps/console.py
  29. +8 −0 odps/dag.py
  30. +2 −2 odps/df/__init__.py
  31. +12 −0 odps/df/backends/core.py
  32. +11 −9 odps/df/backends/engine.py
  33. +29 −2 odps/df/backends/frame.py
  34. +101 −17 odps/df/backends/odpssql/analyzer.py
  35. +17 −13 odps/df/backends/odpssql/codegen.py
  36. +12 −14 odps/df/backends/odpssql/compiler.py
  37. +1 −1 odps/df/backends/odpssql/engine.py
  38. +1 −0 odps/df/backends/odpssql/tests/test_codegen.py
  39. +82 −30 odps/df/backends/odpssql/tests/test_compiler.py
  40. +153 −8 odps/df/backends/odpssql/tests/test_engine.py
  41. +5 −2 odps/df/backends/optimize/predicatepushdown.py
  42. +29 −0 odps/df/backends/optimize/tests/test_predicatepushdown.py
  43. +27 −0 odps/df/backends/pd/analyzer.py
  44. +127 −9 odps/df/backends/pd/compiler.py
  45. +2 −2 odps/df/backends/pd/engine.py
  46. +299 −1 odps/df/backends/pd/tests/test_engine.py
  47. +41 −0 odps/df/backends/tests/test_mixed_engine.py
  48. +379 −5 odps/df/expr/collections.py
  49. +14 −8 odps/df/expr/core.py
  50. +1 −1 odps/df/expr/element.py
  51. +70 −4 odps/df/expr/expressions.py
  52. +164 −65 odps/df/expr/merge.py
  53. +0 −20 odps/df/expr/reduction.py
  54. +1 −1 odps/df/expr/strings.py
  55. +61 −1 odps/df/expr/tests/test_collections.py
  56. +56 −2 odps/df/expr/tests/test_merge.py
  57. +8 −0 odps/df/expr/tests/test_window.py
  58. +1 −1 odps/df/expr/utils.py
  59. +4 −4 odps/df/expr/window.py
  60. +1 −1 odps/df/utils.py
  61. +23 −14 odps/ipython/completer.py
  62. +6 −1 odps/ipython/magics.py
  63. +2 −2 odps/ipython/tests/test_completer.py
  64. +27 −0 odps/ml/__init__.py
  65. +20 −0 odps/ml/adapter/__init__.py
  66. +733 −0 odps/ml/adapter/mixin.py
  67. +310 −0 odps/ml/adapter/op.py
  68. +17 −0 odps/ml/adapter/tests/__init__.py
  69. +235 −0 odps/ml/adapter/tests/test_mixin.py
  70. +211 −0 odps/ml/adapter/tests/test_op.py
  71. +22 −0 odps/ml/algorithms/__init__.py
  72. +338 −0 odps/ml/algorithms/base_algo.py
  73. +424 −0 odps/ml/algorithms/loader.py
  74. +181 −0 odps/ml/algorithms/nodes.py
  75. +432 −0 odps/ml/algorithms/objects.py
  76. +17 −0 odps/ml/algorithms/tests/__init__.py
  77. +73 −0 odps/ml/algorithms/tests/test_algo_build.py
  78. +24 −0 odps/ml/classifiers/__init__.py
  79. +56 −0 odps/ml/classifiers/_customize.py
  80. +17 −0 odps/ml/classifiers/tests/__init__.py
  81. +235 −0 odps/ml/classifiers/tests/test_classifiers.py
  82. +87 −0 odps/ml/classifiers/tests/test_sparse_classifiers.py
  83. +24 −0 odps/ml/clustering/__init__.py
  84. +29 −0 odps/ml/clustering/_customize.py
  85. +17 −0 odps/ml/clustering/tests/__init__.py
  86. +63 −0 odps/ml/clustering/tests/test_clustering.py
  87. +107 −0 odps/ml/cross_validation.py
  88. +111 −0 odps/ml/engines.py
  89. +35 −0 odps/ml/enums.py
  90. +26 −0 odps/ml/feature/__init__.py
  91. +64 −0 odps/ml/feature/_customize.py
  92. +17 −0 odps/ml/feature/tests/__init__.py
  93. +67 −0 odps/ml/feature/tests/test_feature.py
  94. +33 −0 odps/ml/metrics/__init__.py
  95. +192 −0 odps/ml/metrics/_customize.py
  96. +665 −0 odps/ml/metrics/classification.py
  97. +54 −0 odps/ml/metrics/clustering.py
  98. +237 −0 odps/ml/metrics/regression.py
  99. +78 −0 odps/ml/metrics/scorer.py
  100. +22 −0 odps/ml/models/__init__.py
  101. +274 −0 odps/ml/models/base.py
  102. +765 −0 odps/ml/models/pmml.py
  103. +17 −0 odps/ml/models/tests/__init__.py
  104. +58 −0 odps/ml/models/tests/test_base.py
  105. +203 −0 odps/ml/models/tests/test_pmml.py
  106. +28 −0 odps/ml/network/__init__.py
  107. +73 −0 odps/ml/network/_customize.py
  108. +17 −0 odps/ml/network/tests/__init__.py
  109. +153 −0 odps/ml/network/tests/test_networking.py
  110. +20 −0 odps/ml/nodes/__init__.py
  111. +236 −0 odps/ml/nodes/exporters.py
  112. +123 −0 odps/ml/nodes/io_nodes.py
  113. +297 −0 odps/ml/nodes/transform_nodes.py
  114. +22 −0 odps/ml/pipeline/__init__.py
  115. +394 −0 odps/ml/pipeline/core.py
  116. +70 −0 odps/ml/pipeline/steps.py
  117. +17 −0 odps/ml/pipeline/tests/__init__.py
  118. +116 −0 odps/ml/pipeline/tests/test_pipeline.py
  119. +32 −0 odps/ml/preprocess/__init__.py
  120. +147 −0 odps/ml/preprocess/_customize.py
  121. +157 −0 odps/ml/preprocess/predefined.py
  122. +17 −0 odps/ml/preprocess/tests/__init__.py
  123. +60 −0 odps/ml/preprocess/tests/test_preprocess.py
  124. +29 −0 odps/ml/recommend/__init__.py
  125. +62 −0 odps/ml/recommend/_customize.py
  126. +17 −0 odps/ml/recommend/tests/__init__.py
  127. +50 −0 odps/ml/recommend/tests/test_recommend.py
  128. +27 −0 odps/ml/regression/__init__.py
  129. +17 −0 odps/ml/regression/tests/__init__.py
  130. +111 −0 odps/ml/regression/tests/test_regression.py
  131. +27 −0 odps/ml/statistics/__init__.py
  132. +145 −0 odps/ml/statistics/_customize.py
  133. +17 −0 odps/ml/statistics/tests/__init__.py
  134. +96 −0 odps/ml/statistics/tests/test_statistics.py
  135. +27 −0 odps/ml/tensor/__init__.py
  136. +17 −0 odps/ml/tests/__init__.py
  137. +92 −0 odps/ml/tests/base.py
  138. +45 −0 odps/ml/tests/test_cv.py
  139. +129 −0 odps/ml/tests/test_dataframe.py
  140. +96 −0 odps/ml/tests/test_partitions.py
  141. +28 −0 odps/ml/text/__init__.py
  142. +61 −0 odps/ml/text/_customize.py
  143. +17 −0 odps/ml/text/tests/__init__.py
  144. +143 −0 odps/ml/text/tests/test_text_algo.py
  145. +28 −0 odps/ml/timeseries/__init__.py
  146. +43 −0 odps/ml/timeseries/_customize.py
  147. +305 −0 odps/ml/utils.py
  148. +1 −0 odps/models/__init__.py
  149. +72 −6 odps/models/instance.py
  150. +1 −1 odps/models/security/config.py
  151. +59 −4 odps/models/tasks.py
  152. +15 −2 odps/models/tests/test_instances.py
  153. +91 −0 odps/models/worker.py
  154. +6 −3 odps/rest.py
  155. +1 −0 odps/runner/core.py
  156. +52 −17 odps/runner/df/adapter.py
  157. +16 −7 odps/runner/df/engine.py
  158. +1 −1 odps/runner/df/tests/test_adapter.py
  159. +1 −0 odps/runner/engine.py
  160. +1 −1 odps/runner/tests/test_core.py
  161. +0 −1 odps/static/algorithms/classifier.xml
  162. +97 −0 odps/static/algorithms/feature.xml
  163. +350 −0 odps/static/algorithms/metrics.xml
  164. +6 −5 odps/static/algorithms/preprocess.xml
  165. +1 −1 odps/static/ui/package.json
  166. +38 −15 odps/static/ui/src/common.js
  167. +2 −2 odps/static/ui/src/df-view.js
  168. +2 −2 odps/static/ui/src/progress.js
  169. +10 −10 odps/static/ui/target/main.js
  170. +9 −0 odps/tests/core.py
  171. +1 −1 odps/ui/progress.py
  172. +12 −1 odps/utils.py
  173. +1 −0 setup.py
@@ -45,4 +45,7 @@ Definitions
.. autoclass:: odps.models.Function
:members:
+.. autoclass:: odps.models.Worker
+ :members:
+
.. intinclude:: api-def-int.rst
@@ -3,17 +3,21 @@
DataFrame Reference
===================
-.. autoclass:: odps.df.core.DataFrame
+.. autoclass:: odps.df.DataFrame
:members:
-.. autoclass:: odps.df.expr.expressions.CollectionExpr
+.. autoclass:: odps.df.CollectionExpr
:members:
:inherited-members:
-.. autoclass:: odps.df.expr.expressions.SequenceExpr
+.. autoclass:: odps.df.SequenceExpr
:members:
:inherited-members:
-.. autoclass:: odps.df.expr.expressions.Scalar
+.. autoclass:: odps.df.Scalar
+ :members:
+ :inherited-members:
+
+.. autoclass:: odps.df.RandomScalar
:members:
:inherited-members:
@@ -0,0 +1,124 @@
+.. _models:
+
+模型
+========
+
+ODPS 提供了两种模型。一种模型为离线模型(OfflineModel),另一种模型为在线模型(OnlineModel)。离线模型为分类 / 回归算法使用
+训练数据得到的模型,而在线模型是部署离线模型或自定义流程从而形成的在线服务。
+
+离线模型
+---------
+
+离线模型是 XFlow 分类 / 回归算法输出的模型。用户可以使用 PyODPS ML 或直接使用 odps.run_xflow 创建一个离线模型,例如下面使用
+run_xflow 的例子:
+
+.. code-block:: python
+
+ >>> odps.run_xflow('LogisticRegression', 'algo_public', dict(modelName='logistic_regression_model_name', \
+ regularizedLevel='1', maxIter='100', regularizedType='l1', epsilon='0.000001', labelColName='y', \
+ featureColNames='pdays,emp_var_rate', goodValue='1', inputTableName='bank_data'))
+
+在模型创建后,用户可以列出当前 Project 下的模型:
+
+.. code-block:: python
+
+ >>> models = odps.list_offline_models(prefix='prefix')
+
+也可以通过模型名获取模型并读取模型 PMML(如果支持):
+
+.. code-block:: python
+
+ >>> model = odps.get_offline_model('logistic_regression_model_name')
+ >>> pmml = model.get_model()
+
+删除模型可使用下列语句:
+
+.. code-block:: python
+
+ >>> odps.delete_offline_model('logistic_regression_model_name')
+
+在线模型
+---------
+
+在线模型是 ODPS 提供的模型在线部署能力。用户可以通过 Pipeline 部署自己的模型。详细信息请参考“机器学习平台——在线服务”章节。
+
+需要注意的是,在线模型的服务使用的是独立的 Endpoint,需要配置 Predict Endpoint。通过
+
+.. code-block:: python
+
+ >>> odps = ODPS('**your-access-id**', '**your-secret-access-key**', '**your-default-project**',
+ endpoint='**your-end-point**', predict_endpoint='**predict_endpoint**')
+
+即可在 ODPS 对象上添加相关配置。Predict Endpoint 的地址请参考相关说明或咨询管理员。
+
+部署离线模型上线
+~~~~~~~~~~~~~~~~
+
+PyODPS 提供了离线模型的部署功能。部署方法为
+
+.. code-block:: python
+
+ >>> model = odps.create_online_model('**online_model_name**', '**offline_model_name**')
+
+与其他 ODPS 对象类似,创建后,可列举、获取和删除在线模型:
+
+.. code-block:: python
+
+ >>> models = odps.list_online_models(prefix='prefix')
+ >>> model = odps.get_online_model('**online_model_name**')
+ >>> odps.delete_online_model('**online_model_name**')
+
+可使用模型名和数据进行在线预测,输入数据可以是 Record,也可以是字典或数组和 Schema 的组合:
+
+.. code-block:: python
+
+ >>> result = odps.predict_online_model('**online_model_name**', [4, 3, 2, 1],
+ schema=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
+
+也可为模型设置 ABTest。参数中的 modelx 可以是在线模型名,也可以是 get_online_model 获得的模型对象本身,而 percentagex 表示
+modelx 在 ABTest 中所占的百分比,范围为 0 至 100:
+
+.. code-block:: python
+
+ >>> result = odps.config_online_model_ab_test('**online_model_name**', model1, percentage1, model2, percentage2)
+
+与其他对象不同的是,在线模型的创建和删除较为耗时。PyODPS 默认 create_online_model 和 delete_online_model 在整个操作完成后
+才返回。用户可以通过 wait 选项控制是否要在模型创建请求提交后立即返回,然后自己控制等待。例如,下列语句
+
+.. code-block:: python
+
+ >>> model = odps.create_online_model('**online_model_name**', '**offline_model_name**')
+
+等价于
+
+.. code-block:: python
+
+ >>> model = odps.create_online_model('**online_model_name**', '**offline_model_name**', wait=False)
+ >>> while online_model.status == OnlineModel.Status.DEPLOYING:
+ >>> online_model.reload()
+
+部署 PMML 文件上线
+~~~~~~~~~~~~~~~~~~
+由于 PMML 文件的常用性,PyODPS 简化了部署 PMML 文件上线的步骤。类似于离线模型上线,PMML 文件上线也使用 create_online_model
+方法,但需要把离线模型名换成一个 PmmlPredictor 对象,即
+
+.. code-block:: python
+
+ >>> from odps.models.ml import PmmlPredictor
+ >>> predictor = PmmlPredictor('**pmml_string**')
+ >>> model = odps.create_online_model('**online_model_name**', predictor)
+
+其余使用方法与离线模型部署的在线模型相同,不再赘述。
+
+部署自定义 Pipeline 上线
+~~~~~~~~~~~~~~~~~~~~~~~~
+其他含有自定义 Pipeline 的在线模型需要自行构造 ModelPredictor 对象,例子如下:
+
+.. code-block:: python
+
+ >>> from odps.models.ml import ModelPredictor, ModelProcessor
+ >>> processor = ModelProcessor(class_name='**class**', lib='**library name**',
+ resources=['**resource name**', ], config='**configuration**')
+ >>> predictor = ModelPredictor(runtime='Jar or Native', instance_num=5, pipeline=[processor, ],
+ target_name='**target name**')
+ >>> model = odps.create_online_model('**online_model_name**', predictor)
@@ -148,6 +148,9 @@ Record表示表的一行记录,我们在 Table 对象上调用 new_record 就
>>> for record in t.head(3):
>>> # 处理每个Record对象
+
+.. _table_open_reader:
+
其次,在table上可以执行 ``open_reader`` 操作来打一个reader来读取数据。记住这里需要使用 **with表达式**
.. code-block:: python
@@ -16,4 +16,4 @@ PyODPS 提供直接针对 ODPS 对象的基本操作接口,可通过符合 Pyt
base-resources-zh
base-functions-zh
base-volume-int-zh
- base-models-int-zh
+ base-models-zh
Oops, something went wrong.

0 comments on commit 612a278

Please sign in to comment.