Skip to content

Commit

Permalink
Release v0.11.3.beta1 (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
wjsi committed Jan 17, 2023
1 parent 1acf0d2 commit e612ec8
Show file tree
Hide file tree
Showing 202 changed files with 9,839 additions and 3,630 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ $ python setup.py install
>>> dual = o.get_table('dual')
>>> dual.name
'dual'
>>> dual.schema
>>> dual.table_schema
odps.Schema {
c_int_a bigint
c_int_b bigint
Expand All @@ -80,7 +80,7 @@ datetime.datetime(2014, 6, 6, 13, 28, 24)
False
>>> dual.size
448
>>> dual.schema.columns
>>> dual.table_schema.columns
[<column c_int_a, type bigint>,
<column c_int_b, type bigint>,
<column c_double_a, type double>,
Expand Down
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Usage
>>> dual = o.get_table('dual')
>>> dual.name
'dual'
>>> dual.schema
>>> dual.table_schema
odps.Schema {
c_int_a bigint
c_int_b bigint
Expand All @@ -83,7 +83,7 @@ Usage
False
>>> dual.size
448
>>> dual.schema.columns
>>> dual.table_schema.columns
[<column c_int_a, type bigint>,
<column c_int_b, type bigint>,
<column c_double_a, type double>,
Expand Down Expand Up @@ -139,7 +139,7 @@ Command-line and IPython enhancement

In [3]: %sql select * from pyodps_iris limit 5
|==========================================| 1 / 1 (100.00%) 2s
Out[3]:
Out[3]:
sepallength sepalwidth petallength petalwidth name
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
Expand Down
10 changes: 5 additions & 5 deletions benchmarks/perf_tabletunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from odps.compat import unittest, Decimal
from odps.tests.core import TestBase
from odps.models import Schema
from odps.models import TableSchema
from datetime import datetime

# remember to reset False before committing
Expand All @@ -46,7 +46,7 @@ def setUp(self):
self.pr.enable()
fields = ['a', 'b', 'c', 'd', 'e', 'f']
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
self.SCHEMA = Schema.from_lists(fields, types)
self.SCHEMA = TableSchema.from_lists(fields, types)

def tearDown(self):
if ENABLE_PROFILE:
Expand All @@ -63,7 +63,7 @@ def tearDown(self):

def testWrite(self):
table_name = 'pyodps_test_tunnel_write_performance'
self.odps.create_table(table_name, schema=self.SCHEMA, if_not_exists=True)
self.odps.create_table(table_name, self.SCHEMA, if_not_exists=True)
ss = self.tunnel.create_upload_session(table_name)
r = ss.new_record()

Expand All @@ -85,7 +85,7 @@ def testWrite(self):
def testRead(self):
table_name = 'pyodps_test_tunnel_read_performance'
self.odps.delete_table(table_name, if_exists=True)
t = self.odps.create_table(table_name, schema=self.SCHEMA)
t = self.odps.create_table(table_name, self.SCHEMA)

def gen_data():
for i in range(self.DATA_AMOUNT):
Expand Down Expand Up @@ -118,7 +118,7 @@ def gen_data():

def testBufferedWrite(self):
table_name = 'test_tunnel_bufferred_write'
self.odps.create_table(table_name, schema=self.SCHEMA, if_not_exists=True)
self.odps.create_table(table_name, self.SCHEMA, if_not_exists=True)
ss = self.tunnel.create_upload_session(table_name)
r = ss.new_record()

Expand Down
4 changes: 2 additions & 2 deletions benchmarks/perf_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from odps.compat import unittest
from odps.tests.core import TestBase
from odps.models import Schema, Record
from odps.models import TableSchema, Record


class Test(TestBase):
Expand All @@ -36,7 +36,7 @@ def setUp(self):
self.pr.enable()
fields = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
self.SCHEMA = Schema.from_lists(fields, types)
self.SCHEMA = TableSchema.from_lists(fields, types)

def tearDown(self):
p = Stats(self.pr)
Expand Down
8 changes: 4 additions & 4 deletions cupid/io/table/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from types import GeneratorType

from odps import options
from odps.models import Table, Schema
from odps.models import Table, TableSchema
from odps.models.partition import Partition as TablePartition

from cupid.rpc import CupidRpcController, CupidTaskServiceRpcChannel, SandboxRpcChannel
Expand Down Expand Up @@ -134,7 +134,7 @@ def _register_reader(self):
schema_types = [d['type'] for d in schema_json]
pt_schema_names = [d['name'] for d in partition_schema_json]
pt_schema_types = [d['type'] for d in partition_schema_json]
schema = Schema.from_lists(schema_names, schema_types, pt_schema_names, pt_schema_types)
schema = TableSchema.from_lists(schema_names, schema_types, pt_schema_names, pt_schema_types)

return resp.readIterator, schema

Expand Down Expand Up @@ -389,15 +389,15 @@ def create_download_session(session, table_or_parts, split_size=None, split_coun
for t in table_or_parts:
if isinstance(t, Table):
if not columns:
columns = t.schema.names
columns = t.table_schema.names
table_kw = dict(
projectName=t.project.name,
tableName=t.name,
columns=','.join(columns),
)
elif isinstance(t, TablePartition):
if not columns:
columns = t.table.schema.names
columns = t.table.table_schema.names
table_kw = dict(
projectName=t.table.project.name,
tableName=t.table.name,
Expand Down
4 changes: 2 additions & 2 deletions cupid/runtime/runtime_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import time
import unittest

from odps.models import Schema, Record
from odps.models import TableSchema, Record

CLIENT_READ_PIPE_NUM = 2
CLIENT_WRITE_PIPE_NUM = 2
Expand Down Expand Up @@ -60,7 +60,7 @@ def testInterfaces(self):
del ro

def testTableIO(self):
schema = Schema.from_lists(['key', 'value', 'double', 'datetime', 'boolean'],
schema = TableSchema.from_lists(['key', 'value', 'double', 'datetime', 'boolean'],
['bigint', 'string', 'double', 'datetime', 'boolean'])
label = self.client.sync_call('test', 'write_label')
print('Write label: ' + label)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/base-functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
函数
========

ODPS用户可以编写自定义 `函数 <https://docs.aliyun.com/#/pub/odps/basic/definition&function>`_ 用在ODPS SQL中。
ODPS用户可以编写自定义 `函数 <https://help.aliyun.com/document_detail/27823.html>`_ 用在ODPS SQL中。

基本操作
---------
Expand Down
2 changes: 1 addition & 1 deletion docs/source/base-projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
项目空间
=========

`项目空间 <https://docs.aliyun.com/#/pub/odps/basic/definition&project>`_ 是ODPS的基本组织单元,
`项目空间 <https://help.aliyun.com/document_detail/27818.html>`_ 是ODPS的基本组织单元,
有点类似于Database的概念。

我们通过 ODPS 入口对象的 ``get_project`` 来取到某个项目空间。
Expand Down
38 changes: 30 additions & 8 deletions docs/source/base-resources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
资源
=======

`资源 <https://docs.aliyun.com/#/pub/odps/basic/definition&resource>`_ 在ODPS上常用在UDF和MapReduce中。
`资源 <https://help.aliyun.com/document_detail/27822.html>`_ 在ODPS上常用在UDF和MapReduce中。

列出所有资源还是可以使用 ``list_resources``,判断资源是否存在使用 ``exist_resource``。
删除资源时,可以调用 ``delete_resource``,或者直接对于Resource对象调用 ``drop`` 方法。
Expand All @@ -22,22 +22,27 @@

.. code-block:: python
resource = o.create_resource('test_file_resource', 'file', file_obj=open('/to/path/file')) # 使用file-like的对象
resource = o.create_resource('test_py_resource', 'py', file_obj='import this') # 使用字符串
resource = o.create_resource('test_file_resource', 'file', fileobj=open('/to/path/file')) # 使用file-like的对象
resource = o.create_resource('test_py_resource', 'py', fileobj='import this') # 使用字符串
可以通过 ``temp=True`` 创建一个临时资源。

.. code-block:: python
resource = o.create_resource('test_file_resource', 'file', file_obj=open('/to/path/file'), temp=True)
resource = o.create_resource('test_file_resource', 'file', fileobj=open('/to/path/file'), temp=True)
.. note::

读取和修改文件资源
~~~~~~~~~~~~~~~~~~
在 fileobj 参数中传入字符串,创建的资源内容为 **字符串本身** 而非字符串代表的路径指向的文件。

如果文件过大(例如大小超过 64MB),PyODPS 可能会使用分块上传模式,而这不被旧版 MaxCompute 部署所支持。
如需在旧版 MaxCompute 中上传大文件,请配置 ``options.upload_resource_in_chunks = False`` 。

对文件资源调用 ``open`` 方法,或者在odps入口调用 ``open_resource`` 都能打开一个资源,
打开后的对象会是file-like的对象。
读取和修改文件资源
~~~~~~~~~~~~~~
对文件资源调用 ``open`` 方法,或者在 MaxCompute 入口调用 ``open_resource`` 都能打开一个资源,
打开后的对象会是 file-like 的对象。
类似于Python内置的 ``open`` 方法,文件资源也支持打开的模式。我们看例子:

.. code-block:: python
Expand Down Expand Up @@ -69,6 +74,23 @@
同时,PyODPS中,文件资源支持以二进制模式打开,打开如说一些压缩文件等等就需要以这种模式,
因此 ``rb`` 就是指以二进制读模式打开文件,``r+b`` 是指以二进制读写模式打开。

对于较大的文件资源,可以使用流式方式读写文件,使用方法为在调用 ``open_resource`` 时增加一个
``stream=True`` 选项:

.. code-block:: python
>>> with o.open_resource('test_file_resource', mode='w') as fp: # 写模式打开
>>> fp.writelines(['Hello\n', 'World\n']) # 写入多行
>>> fp.write('Hello World')
>>> fp.flush() # 手动调用会将更新提交到 MaxCompute
>>>
>>> with resource.open('r', stream=True) as fp: # 以读模式打开
>>> content = fp.read() # 读取全部的内容
>>> line = fp.readline() # 回到资源开头
>>> lines = fp.readlines() # 读成多行
当 ``stream=True`` 时,只支持 ``r`` , ``rb`` , ``w`` , ``wb`` 四种模式。

表资源
-------

Expand Down
53 changes: 53 additions & 0 deletions docs/source/base-schemas.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
.. _schema:

Schema
=======

.. note::

Schema 属于 MaxCompute 的公测功能,需要通过 `新功能测试申请 <https://help.aliyun.com/document_detail/128366.htm>`_ 开通。

`Schema <https://help.aliyun.com/document_detail/437084.html>`_ 是 MaxCompute
介于项目和表 / 资源 / 函数之间的概念,对表 / 资源 / 函数进行进一步归类。

你可以使用 ``create_schema`` 创建一个 Schema 对象:

.. code-block:: python
schema = o.create_schema("test_schema")
print(schema)
使用 ``delete_schema`` 删除一个 Schema 对象:

.. code-block:: python
schema = o.delete_schema("test_schema")
使用 ``list_schema`` 列举所有 Schema 对象:

.. code-block:: python
for schema in o.list_schema():
print(schema)
在开启 Schema 后,MaxCompute 入口对象默认操作的 MaxCompute 对象都位于名为 ``DEFAULT``
的 Schema 下。为操作其他 Schema 下的对象,需要在创建入口对象时指定 Schema,例如:

.. code-block:: python
o = ODPS('**your-access-id**', '**your-secret-access-key**', '**your-default-project**',
endpoint='**your-end-point**', schema='**your-schema-name**')
也可以为不同对象的操作方法指定 ``schema`` 参数。例如,下面的方法列举了 ``test_schema``
下所有的表:

.. code-block:: python
for table in o.list_tables(schema='test_schema'):
print(table)
在执行 SQL 时,可以指定默认 Schema:

.. code-block:: python
o.execute_sql("SELECT * FROM dual", default_schema="test_schema")
4 changes: 2 additions & 2 deletions docs/source/base-sql.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ PyODPS 默认不限制能够从 Instance 读取的数据规模。对于受保护

.. code-block:: python
from odps.models import Schema
from odps.models import TableSchema
myfunc = '''\
from odps.udf import annotate
Expand All @@ -147,7 +147,7 @@ PyODPS 默认不限制能够从 Instance 读取的数据规模。对于受保护
table = o.create_table(
'test_table',
schema=Schema.from_lists(['size'], ['bigint']),
TableSchema.from_lists(['size'], ['bigint']),
if_not_exists=True
)
Expand Down
18 changes: 18 additions & 0 deletions docs/source/base-sqlalchemy.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,24 @@ PyODPS 支持集成 SQLAlchemy,可以使用 SQLAlchemy 查询 MaxCompute 数
conn = engine.connect()
如果需要为 SQL 作业配置执行选项,可以使用 PyODPS 提供的 ``options`` 对象:

.. code-block:: python
from odps import options
from sqlalchemy import create_engine
options.sql.settings = {'odps.sql.hive.compatible': 'true'}
engine = create_engine('odps://<access_id>:<access_key>@<project>/?endpoint=<endpoint>')
也可以直接配置在连接字符串中:

.. code-block:: python
from sqlalchemy import create_engine
engine = create_engine('odps://<access_id>:<access_key>@<project>/?endpoint=<endpoint>&odps.sql.hive.compatible=true')
使用上述方式时,每个 engine 对象都会拥有不同的选项。

调用 SQLAlchemy 接口
----------------------
Expand Down

0 comments on commit e612ec8

Please sign in to comment.