diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst
new file mode 100644
index 000000000..f81753e08
--- /dev/null
+++ b/docs/source/python/api.rst
@@ -0,0 +1,30 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api:
+
+*************
+API Reference
+*************
+
+.. toctree::
+ :maxdepth: 2
+
+ api/dataframe
+ api/execution_context
+ api/expression
+ api/functions
diff --git a/docs/source/python/api/dataframe.rst b/docs/source/python/api/dataframe.rst
new file mode 100644
index 000000000..0a3c4c8b1
--- /dev/null
+++ b/docs/source/python/api/dataframe.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.dataframe:
+.. currentmodule:: datafusion
+
+DataFrame
+=========
+
+.. autosummary::
+ :toctree: ../generated/
+
+ DataFrame
diff --git a/docs/source/python/api/execution_context.rst b/docs/source/python/api/execution_context.rst
new file mode 100644
index 000000000..5b7e0f82f
--- /dev/null
+++ b/docs/source/python/api/execution_context.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.execution_context:
+.. currentmodule:: datafusion
+
+SessionContext
+================
+
+.. autosummary::
+ :toctree: ../generated/
+
+ SessionContext
diff --git a/docs/source/python/api/expression.rst b/docs/source/python/api/expression.rst
new file mode 100644
index 000000000..45923fb54
--- /dev/null
+++ b/docs/source/python/api/expression.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.expression:
+.. currentmodule:: datafusion
+
+Expression
+==========
+
+.. autosummary::
+ :toctree: ../generated/
+
+ Expression
diff --git a/docs/source/python/api/functions.rst b/docs/source/python/api/functions.rst
new file mode 100644
index 000000000..6f10d826e
--- /dev/null
+++ b/docs/source/python/api/functions.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.functions:
+.. currentmodule:: datafusion
+
+Functions
+=========
+
+.. autosummary::
+ :toctree: ../generated/
+
+ functions
diff --git a/docs/source/python/generated/datafusion.DataFrame.rst b/docs/source/python/generated/datafusion.DataFrame.rst
new file mode 100644
index 000000000..365f5931d
--- /dev/null
+++ b/docs/source/python/generated/datafusion.DataFrame.rst
@@ -0,0 +1,33 @@
+datafusion.DataFrame
+====================
+
+.. currentmodule:: datafusion
+
+.. autoclass:: DataFrame
+
+
+ .. automethod:: __init__
+
+
+ .. rubric:: Methods
+
+ .. autosummary::
+
+ ~DataFrame.__init__
+ ~DataFrame.aggregate
+ ~DataFrame.collect
+ ~DataFrame.explain
+ ~DataFrame.filter
+ ~DataFrame.join
+ ~DataFrame.limit
+ ~DataFrame.schema
+ ~DataFrame.select
+ ~DataFrame.select_columns
+ ~DataFrame.show
+ ~DataFrame.sort
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.Expression.rst b/docs/source/python/generated/datafusion.Expression.rst
new file mode 100644
index 000000000..427fed0d3
--- /dev/null
+++ b/docs/source/python/generated/datafusion.Expression.rst
@@ -0,0 +1,28 @@
+datafusion.Expression
+=====================
+
+.. currentmodule:: datafusion
+
+.. autoclass:: Expression
+
+
+ .. automethod:: __init__
+
+
+ .. rubric:: Methods
+
+ .. autosummary::
+
+ ~Expression.__init__
+ ~Expression.alias
+ ~Expression.cast
+ ~Expression.column
+ ~Expression.is_null
+ ~Expression.literal
+ ~Expression.sort
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.SessionContext.rst b/docs/source/python/generated/datafusion.SessionContext.rst
new file mode 100644
index 000000000..86b942f20
--- /dev/null
+++ b/docs/source/python/generated/datafusion.SessionContext.rst
@@ -0,0 +1,34 @@
+datafusion.SessionContext
+=========================
+
+.. currentmodule:: datafusion
+
+.. autoclass:: SessionContext
+
+
+ .. automethod:: __init__
+
+
+ .. rubric:: Methods
+
+ .. autosummary::
+
+ ~SessionContext.__init__
+ ~SessionContext.catalog
+ ~SessionContext.create_dataframe
+ ~SessionContext.deregister_table
+ ~SessionContext.empty_table
+ ~SessionContext.register_csv
+ ~SessionContext.register_parquet
+ ~SessionContext.register_record_batches
+ ~SessionContext.register_table
+ ~SessionContext.register_udf
+ ~SessionContext.sql
+ ~SessionContext.table
+ ~SessionContext.tables
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.functions.rst b/docs/source/python/generated/datafusion.functions.rst
new file mode 100644
index 000000000..4bac3c33a
--- /dev/null
+++ b/docs/source/python/generated/datafusion.functions.rst
@@ -0,0 +1,23 @@
+datafusion.functions
+====================
+
+.. automodule:: datafusion.functions
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
new file mode 100644
index 000000000..167e66b9f
--- /dev/null
+++ b/docs/source/python/index.rst
@@ -0,0 +1,251 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+====================
+DataFusion in Python
+====================
+
+This is a Python library that binds to `Apache Arrow `_ in-memory query engine `DataFusion `_.
+
+Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python.
+
+It also allows you to use UDFs and UDAFs for complex operations.
+
+The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations.
+
+Its query engine, DataFusion, is written in `Rust `_, which makes strong assumptions about thread safety and lack of memory leaks.
+
+Technically, zero-copy is achieved via the `c data interface `_.
+
+How to use it
+=============
+
+Simple usage:
+
+.. code-block:: python
+
+ import datafusion
+ from datafusion import functions as f
+ from datafusion import col
+ import pyarrow
+
+ # create a context
+ ctx = datafusion.SessionContext()
+
+ # create a RecordBatch and a new DataFrame from it
+ batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+ )
+ df = ctx.create_dataframe([[batch]])
+
+ # create a new statement
+ df = df.select(
+ col("a") + col("b"),
+ col("a") - col("b"),
+ )
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert result.column(0) == pyarrow.array([5, 7, 9])
+ assert result.column(1) == pyarrow.array([-3, -3, -3])
+
+
+We can also execute a query against data stored in CSV
+
+.. code-block:: bash
+
+ echo "a,b\n1,4\n2,5\n3,6" > example.csv
+
+
+.. code-block:: python
+
+ import datafusion
+ from datafusion import functions as f
+ from datafusion import col
+ import pyarrow
+
+ # create a context
+ ctx = datafusion.SessionContext()
+
+ # register a CSV
+ ctx.register_csv('example', 'example.csv')
+
+ # create a new statement
+ df = ctx.table('example').select(
+ col("a") + col("b"),
+ col("a") - col("b"),
+ )
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert result.column(0) == pyarrow.array([5, 7, 9])
+ assert result.column(1) == pyarrow.array([-3, -3, -3])
+
+
+And how to execute a query against a CSV using SQL:
+
+
+.. code-block:: python
+
+ import datafusion
+ from datafusion import functions as f
+ from datafusion import col
+ import pyarrow
+
+ # create a context
+ ctx = datafusion.SessionContext()
+
+ # register a CSV
+ ctx.register_csv('example', 'example.csv')
+
+ # create a new statement via SQL
+ df = ctx.sql("SELECT a+b, a-b FROM example")
+
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
+
+ assert result.column(0) == pyarrow.array([5, 7, 9])
+ assert result.column(1) == pyarrow.array([-3, -3, -3])
+
+
+
+UDFs
+----
+
+.. code-block:: python
+
+ def is_null(array: pyarrow.Array) -> pyarrow.Array:
+ return array.is_null()
+
+ udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_())
+
+ df = df.select(udf(col("a")))
+
+
+UDAF
+----
+
+.. code-block:: python
+
+ import pyarrow
+ import pyarrow.compute
+
+
+ class Accumulator:
+ """
+ Interface of a user-defined accumulation.
+ """
+ def __init__(self):
+ self._sum = pyarrow.scalar(0.0)
+
+ def to_scalars(self) -> [pyarrow.Scalar]:
+ return [self._sum]
+
+ def update(self, values: pyarrow.Array) -> None:
+ # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+ self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
+
+ def merge(self, states: pyarrow.Array) -> None:
+ # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+ self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
+
+ def evaluate(self) -> pyarrow.Scalar:
+ return self._sum
+
+
+ df = ...
+
+ udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()])
+
+ df = df.aggregate(
+ [],
+ [udaf(col("a"))]
+ )
+
+
+How to install (from pip)
+=========================
+
+.. code-block:: shell
+
+ pip install datafusion
+
+
+How to develop
+==============
+
+This assumes that you have rust and cargo installed. We use the workflow recommended by `pyo3 `_ and `maturin `_.
+
+Bootstrap:
+
+.. code-block:: shell
+
+ # fetch this repo
+ git clone git@github.com:apache/arrow-datafusion.git
+
+ cd arrow-datafusion/python
+
+ # prepare development environment (used to build wheel / install in development)
+ python3 -m venv venv
+ # activate the venv
+ source venv/bin/activate
+ pip install -r requirements.txt
+
+
+Whenever rust code changes (your changes or via `git pull`):
+
+.. code-block:: shell
+
+ # make sure you activate the venv using "source venv/bin/activate" first
+ maturin develop
+ python -m pytest
+
+
+How to update dependencies
+==========================
+
+To change test dependencies, change the `requirements.in` and run
+
+.. code-block:: shell
+
+ # install pip-tools (this can be done only once), also consider running in venv
+ pip install pip-tools
+
+ # change requirements.in and then run
+ pip-compile --generate-hashes
+
+
+To update dependencies, run
+
+.. code-block:: shell
+
+ pip-compile update
+
+
+More details about pip-tools `here `_
+
+
+API reference
+=============
+
+.. toctree::
+ :maxdepth: 2
+
+ api