From 8ae0dfeb3743d4f3d74fc6aa34b92564ea632d24 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Mon, 20 Nov 2023 11:08:44 +0800
Subject: [PATCH 01/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 python/docs/source/user_guide/10min.rst | 188 ++++++++++++++++++++++++
 python/docs/source/user_guide/index.rst |   1 +
 2 files changed, 189 insertions(+)
 create mode 100644 python/docs/source/user_guide/10min.rst

diff --git a/python/docs/source/user_guide/10min.rst b/python/docs/source/user_guide/10min.rst
new file mode 100644
index 0000000000000..0feda9c118c1c
--- /dev/null
+++ b/python/docs/source/user_guide/10min.rst
@@ -0,0 +1,188 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+=====================
+10 minutes to PySpark
+=====================
+
+.. currentmodule:: pyspark.sql
+
+This is a short introduction to pyspark, geared mainly for new users.
+You can see more complex recipes in the :ref:`/reference/index.rst`.
+
+Basic data structures
+---------------------
+
+Pyspark provides an important class for handling data:
+
+1. :class:`DataFrame`: a distributed collection of data grouped into named columns.
+
+SparkSession creation
+---------------------
+PySpark applications start with initializing :class:`SparkSession` which is the entry point of PySpark as below.
+In case of running it in PySpark shell via pyspark executable, the shell automatically creates the session
+in the variable spark for users.
+
+.. code-block:: python
+
+    from pyspark.sql import SparkSession
+    spark = SparkSession.builder.getOrCreate()
+
+DataFrame creation
+------------------
+A PySpark :class:`DataFrame` can be created via :meth:`SparkSession.createDataFrame` typically by passing
+a list of lists, tuples, dictionaries and :class:`Row`, a pandas :class:`pandas.DataFrame`
+and an :class:`pyspark.RDD` consisting of such a list.
+:meth:`SparkSession.createDataFrame` takes the `schema` argument to specify the schema of the DataFrame.
+When it is omitted, PySpark infers the corresponding schema by taking a sample from the data.
+
+Creating a PySpark :class:`DataFrame` from a list of lists
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.createDataFrame([['Alice', 1], ['Bob', 5]])
+    df
+
+DataFrame[_1: string, _2: bigint]
+
+
+Creating a PySpark :class:`DataFrame` from a list of tuples
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.createDataFrame([('Alice', 1), ('Bob', 5)])
+    df
+
+DataFrame[_1: string, _2: bigint]
+
+
+Creating a PySpark :class:`DataFrame` from a list of dictionaries
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.createDataFrame([{'name': 'Alice', 'age': 1}])
+    df
+
+DataFrame[age: bigint, name: string]
+
+
+Creating a PySpark :class:`DataFrame` from a list of :class:`Row`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from pyspark.sql import Row
+    Person = Row('name', 'age')
+    df = spark.createDataFrame([Person("Alice", 1), Person("Bob", 5)])
+    df
+
+DataFrame[name: string, age: bigint]
+
+
+Creating a PySpark :class:`DataFrame` from a :class:`pandas.DataFrame`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import pandas as pd
+    df = spark.createDataFrame(pd.DataFrame([[1, 2]]))
+    df
+
+DataFrame[0: bigint, 1: bigint]
+
+
+Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    import numpy as np
+    import pandas as pd
+    df = spark.createDataFrame(pd.DataFrame(data=np.array([[1, 2], [3, 4]]), columns=['a', 'b']))
+    df
+
+DataFrame[a: bigint, b: bigint]
+
+
+Creating a PySpark :class:`DataFrame` from an :class:`pyspark.RDD`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from pyspark.sql import Row
+    rdd = spark.sparkContext.parallelize([Row(name = "Alice", age = 2), Row(name = "Bob", age = 5)])
+    df = spark.createDataFrame(rdd)
+    df
+
+DataFrame[name: string, age: bigint]
+
+
+Creating a PySpark :class:`DataFrame` by reading existing **json** format file data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.read.format("json").load("python/test_support/sql/people.json")
+    df
+
+DataFrame[age: bigint, name: string]
+
+
+Creating a PySpark :class:`DataFrame` by reading existing **csv** format file data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.read.format("csv").load("python/test_support/sql/people.csv")
+    df
+
+DataFrame[age: bigint, name: string]
+
+
+Creating a PySpark :class:`DataFrame` by reading existing **parquet** format file data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.read.format("parquet").load("python/test_support/sql/people.parquet")
+    df
+
+DataFrame[age: bigint, name: string]
+
+Creating a PySpark :class:`DataFrame` by reading existing **orc** format file data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.read.format("parquet").load("python/test_support/sql/people.orc")
+    df
+
+DataFrame[age: bigint, name: string]
+
+
+Creating a PySpark :class:`DataFrame` by reading data from other databases using **JDBC**
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    df = spark.read.format("jdbc").options(url=url, dbtable=dbtable).load()
+    df
+
+DataFrame[age: bigint, name: string]
diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index 67f8c8d4d0fe3..1f8437010e721 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -26,6 +26,7 @@ PySpark specific user guides are available here:
    :maxdepth: 2
 
    python_packaging
+   10min
    sql/index
    pandas_on_spark/index
 

From 99d2ca3dcf4f65d895ff10ecd94b4f8eca29b328 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Mon, 20 Nov 2023 11:38:28 +0800
Subject: [PATCH 02/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 .../{10min.rst => dataframe_creation.rst}     | 27 +++++++------------
 python/docs/source/user_guide/index.rst       |  2 +-
 2 files changed, 10 insertions(+), 19 deletions(-)
 rename python/docs/source/user_guide/{10min.rst => dataframe_creation.rst} (89%)

diff --git a/python/docs/source/user_guide/10min.rst b/python/docs/source/user_guide/dataframe_creation.rst
similarity index 89%
rename from python/docs/source/user_guide/10min.rst
rename to python/docs/source/user_guide/dataframe_creation.rst
index 0feda9c118c1c..d57f3e5b981ac 100644
--- a/python/docs/source/user_guide/10min.rst
+++ b/python/docs/source/user_guide/dataframe_creation.rst
@@ -15,15 +15,12 @@
     specific language governing permissions and limitations
     under the License.
 
-=====================
-10 minutes to PySpark
-=====================
+==================
+DataFrame creation
+==================
 
 .. currentmodule:: pyspark.sql
 
-This is a short introduction to pyspark, geared mainly for new users.
-You can see more complex recipes in the :ref:`/reference/index.rst`.
-
 Basic data structures
 ---------------------
 
@@ -31,19 +28,9 @@ Pyspark provides an important class for handling data:
 
 1. :class:`DataFrame`: a distributed collection of data grouped into named columns.
 
-SparkSession creation
----------------------
-PySpark applications start with initializing :class:`SparkSession` which is the entry point of PySpark as below.
-In case of running it in PySpark shell via pyspark executable, the shell automatically creates the session
-in the variable spark for users.
-
-.. code-block:: python
+Creating through `createDataFrame`
+----------------------------------
 
-    from pyspark.sql import SparkSession
-    spark = SparkSession.builder.getOrCreate()
-
-DataFrame creation
-------------------
 A PySpark :class:`DataFrame` can be created via :meth:`SparkSession.createDataFrame` typically by passing
 a list of lists, tuples, dictionaries and :class:`Row`, a pandas :class:`pandas.DataFrame`
 and an :class:`pyspark.RDD` consisting of such a list.
@@ -134,6 +121,9 @@ Creating a PySpark :class:`DataFrame` from an :class:`pyspark.RDD`
 DataFrame[name: string, age: bigint]
 
 
+Creating through `read.format(...).load(...)`
+---------------------------------------------
+
 Creating a PySpark :class:`DataFrame` by reading existing **json** format file data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -186,3 +176,4 @@ Creating a PySpark :class:`DataFrame` by reading data from other databases using
     df
 
 DataFrame[age: bigint, name: string]
+
diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index 1f8437010e721..86f1a37b7f557 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -26,7 +26,7 @@ PySpark specific user guides are available here:
    :maxdepth: 2
 
    python_packaging
-   10min
+   dataframe_creation
    sql/index
    pandas_on_spark/index
 

From 74b86c2900959f7270b9587bc4ce651293173eee Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Tue, 21 Nov 2023 18:56:36 +0800
Subject: [PATCH 03/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 .../source/user_guide/dataframe_creation.rst  | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/python/docs/source/user_guide/dataframe_creation.rst b/python/docs/source/user_guide/dataframe_creation.rst
index d57f3e5b981ac..2a2b3b24a9890 100644
--- a/python/docs/source/user_guide/dataframe_creation.rst
+++ b/python/docs/source/user_guide/dataframe_creation.rst
@@ -16,25 +16,25 @@
     under the License.
 
 ==================
-DataFrame creation
+DataFrame Creation
 ==================
 
 .. currentmodule:: pyspark.sql
 
-Basic data structures
+Basic Data Structures
 ---------------------
 
-Pyspark provides an important class for handling data:
+PySpark provides an important class for handling data:
 
 1. :class:`DataFrame`: a distributed collection of data grouped into named columns.
 
-Creating through `createDataFrame`
+Creating Through `createDataFrame`
 ----------------------------------
 
 A PySpark :class:`DataFrame` can be created via :meth:`SparkSession.createDataFrame` typically by passing
-a list of lists, tuples, dictionaries and :class:`Row`, a pandas :class:`pandas.DataFrame`
-and an :class:`pyspark.RDD` consisting of such a list.
-:meth:`SparkSession.createDataFrame` takes the `schema` argument to specify the schema of the DataFrame.
+a list of lists, tuples, dictionaries and :class:`Row`, a pandas :class:`pandas.DataFrame`,
+a NumPy :class:`numpy.ndarray` and an :class:`pyspark.RDD`.
+:meth:`SparkSession.createDataFrame` takes the `schema` argument to specify the schema of the :class:`DataFrame`.
 When it is omitted, PySpark infers the corresponding schema by taking a sample from the data.
 
 Creating a PySpark :class:`DataFrame` from a list of lists
@@ -59,6 +59,19 @@ Creating a PySpark :class:`DataFrame` from a list of tuples
 DataFrame[_1: string, _2: bigint]
 
 
+Creating a PySpark :class:`DataFrame` with the explicit schema specified
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from pyspark.sql.types import *
+    schema = StructType([StructField("name", StringType(), True), StructField("age", IntegerType(), True)])
+    df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
+    df
+
+DataFrame[name: string, age: int]
+
+
 Creating a PySpark :class:`DataFrame` from a list of dictionaries
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -108,20 +121,7 @@ Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
 DataFrame[a: bigint, b: bigint]
 
 
-Creating a PySpark :class:`DataFrame` from an :class:`pyspark.RDD`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-    from pyspark.sql import Row
-    rdd = spark.sparkContext.parallelize([Row(name = "Alice", age = 2), Row(name = "Bob", age = 5)])
-    df = spark.createDataFrame(rdd)
-    df
-
-DataFrame[name: string, age: bigint]
-
-
-Creating through `read.format(...).load(...)`
+Creating Through `read.format(...).load(...)`
 ---------------------------------------------
 
 Creating a PySpark :class:`DataFrame` by reading existing **json** format file data

From 62ad78ddd2901784425a534f9cfdeb8c94554d77 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 23 Nov 2023 16:29:45 +0800
Subject: [PATCH 04/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 .../source/user_guide/dataframe_creation.rst  | 164 ++++++++++++------
 1 file changed, 109 insertions(+), 55 deletions(-)

diff --git a/python/docs/source/user_guide/dataframe_creation.rst b/python/docs/source/user_guide/dataframe_creation.rst
index 2a2b3b24a9890..04c08e65aae59 100644
--- a/python/docs/source/user_guide/dataframe_creation.rst
+++ b/python/docs/source/user_guide/dataframe_creation.rst
@@ -42,10 +42,14 @@ Creating a PySpark :class:`DataFrame` from a list of lists
 
 .. code-block:: python
 
-    df = spark.createDataFrame([['Alice', 1], ['Bob', 5]])
-    df
-
-DataFrame[_1: string, _2: bigint]
+    >>> df = spark.createDataFrame([['Alice', 1], ['Bob', 5]])
+    >>> df.show()
+    +-----+---+
+    |   _1| _2|
+    +-----+---+
+    |Alice|  1|
+    |  Bob|  5|
+    +-----+---+
 
 
 Creating a PySpark :class:`DataFrame` from a list of tuples
@@ -53,10 +57,14 @@ Creating a PySpark :class:`DataFrame` from a list of tuples
 
 .. code-block:: python
 
-    df = spark.createDataFrame([('Alice', 1), ('Bob', 5)])
-    df
-
-DataFrame[_1: string, _2: bigint]
+    >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)])
+    >>> df.show()
+    +-----+---+
+    |   _1| _2|
+    +-----+---+
+    |Alice|  1|
+    |  Bob|  5|
+    +-----+---+
 
 
 Creating a PySpark :class:`DataFrame` with the explicit schema specified
@@ -64,12 +72,17 @@ Creating a PySpark :class:`DataFrame` with the explicit schema specified
 
 .. code-block:: python
 
-    from pyspark.sql.types import *
-    schema = StructType([StructField("name", StringType(), True), StructField("age", IntegerType(), True)])
-    df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
-    df
-
-DataFrame[name: string, age: int]
+    >>> from pyspark.sql.types import *
+    >>> schema = StructType([StructField("name", StringType(), True),
+    ...     StructField("age", IntegerType(), True)])
+    >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
+    >>> df.show()
+    +-----+---+
+    | name|age|
+    +-----+---+
+    |Alice|  1|
+    |  Bob|  5|
+    +-----+---+
 
 
 Creating a PySpark :class:`DataFrame` from a list of dictionaries
@@ -77,10 +90,13 @@ Creating a PySpark :class:`DataFrame` from a list of dictionaries
 
 .. code-block:: python
 
-    df = spark.createDataFrame([{'name': 'Alice', 'age': 1}])
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.createDataFrame([{'name': 'Alice', 'age': 1}])
+    >>> df.show()
+    +---+-----+
+    |age| name|
+    +---+-----+
+    |  1|Alice|
+    +---+-----+
 
 
 Creating a PySpark :class:`DataFrame` from a list of :class:`Row`
@@ -88,12 +104,16 @@ Creating a PySpark :class:`DataFrame` from a list of :class:`Row`
 
 .. code-block:: python
 
-    from pyspark.sql import Row
-    Person = Row('name', 'age')
-    df = spark.createDataFrame([Person("Alice", 1), Person("Bob", 5)])
-    df
-
-DataFrame[name: string, age: bigint]
+    >>> from pyspark.sql import Row
+    >>> Person = Row('name', 'age')
+    >>> df = spark.createDataFrame([Person("Alice", 1), Person("Bob", 5)])
+    >>> df.show()
+    +-----+---+
+    | name|age|
+    +-----+---+
+    |Alice|  1|
+    |  Bob|  5|
+    +-----+---+
 
 
 Creating a PySpark :class:`DataFrame` from a :class:`pandas.DataFrame`
@@ -101,11 +121,14 @@ Creating a PySpark :class:`DataFrame` from a :class:`pandas.DataFrame`
 
 .. code-block:: python
 
-    import pandas as pd
-    df = spark.createDataFrame(pd.DataFrame([[1, 2]]))
-    df
-
-DataFrame[0: bigint, 1: bigint]
+    >>> import pandas as pd
+    >>> df = spark.createDataFrame(pd.DataFrame([[1, 2]]))
+    >>> df.show()
+    +---+---+
+    |  0|  1|
+    +---+---+
+    |  1|  2|
+    +---+---+
 
 
 Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
@@ -113,12 +136,17 @@ Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
 
 .. code-block:: python
 
-    import numpy as np
-    import pandas as pd
-    df = spark.createDataFrame(pd.DataFrame(data=np.array([[1, 2], [3, 4]]), columns=['a', 'b']))
-    df
-
-DataFrame[a: bigint, b: bigint]
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> df = spark.createDataFrame(pd.DataFrame(data=np.array([[1, 2], [3, 4]]),
+    ...     columns=['a', 'b']))
+    >>> df.show()
+    +---+---+
+    |  a|  b|
+    +---+---+
+    |  1|  2|
+    |  3|  4|
+    +---+---+
 
 
 Creating Through `read.format(...).load(...)`
@@ -129,10 +157,15 @@ Creating a PySpark :class:`DataFrame` by reading existing **json** format file d
 
 .. code-block:: python
 
-    df = spark.read.format("json").load("python/test_support/sql/people.json")
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.read.format("json").load("python/test_support/sql/people.json")
+    >>> df.show()
+    +----+-------+
+    | age|   name|
+    +----+-------+
+    |NULL|Michael|
+    |  30|   Andy|
+    |  19| Justin|
+    +----+-------+
 
 
 Creating a PySpark :class:`DataFrame` by reading existing **csv** format file data
@@ -140,10 +173,16 @@ Creating a PySpark :class:`DataFrame` by reading existing **csv** format file da
 
 .. code-block:: python
 
-    df = spark.read.format("csv").load("python/test_support/sql/people.csv")
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.read.format("csv").option("header", "true").
+    ...     load("python/test_support/sql/people.csv")
+    >>> df.show()
+    +----+-------+
+    | age|   name|
+    +----+-------+
+    |NULL|Michael|
+    |  30|   Andy|
+    |  19| Justin|
+    +----+-------+
 
 
 Creating a PySpark :class:`DataFrame` by reading existing **parquet** format file data
@@ -151,20 +190,30 @@ Creating a PySpark :class:`DataFrame` by reading existing **parquet** format fil
 
 .. code-block:: python
 
-    df = spark.read.format("parquet").load("python/test_support/sql/people.parquet")
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.read.format("parquet").load("python/test_support/sql/people.parquet")
+    >>> df.show()
+    +----+-------+
+    | age|   name|
+    +----+-------+
+    |NULL|Michael|
+    |  30|   Andy|
+    |  19| Justin|
+    +----+-------+
 
 Creating a PySpark :class:`DataFrame` by reading existing **orc** format file data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
-    df = spark.read.format("parquet").load("python/test_support/sql/people.orc")
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.read.format("parquet").load("python/test_support/sql/people.orc")
+    >>> df.show()
+    +----+-------+
+    | age|   name|
+    +----+-------+
+    |NULL|Michael|
+    |  30|   Andy|
+    |  19| Justin|
+    +----+-------+
 
 
 Creating a PySpark :class:`DataFrame` by reading data from other databases using **JDBC**
@@ -172,8 +221,13 @@ Creating a PySpark :class:`DataFrame` by reading data from other databases using
 
 .. code-block:: python
 
-    df = spark.read.format("jdbc").options(url=url, dbtable=dbtable).load()
-    df
-
-DataFrame[age: bigint, name: string]
+    >>> df = spark.read.format("jdbc").options(url=url, dbtable=dbtable).load()
+    >>> df.show()
+    +----+-------+
+    | age|   name|
+    +----+-------+
+    |NULL|Michael|
+    |  30|   Andy|
+    |  19| Justin|
+    +----+-------+
 

From 6f3cbbb8e3e3a670c96d60a7190c2462270c2c3e Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Fri, 24 Nov 2023 11:22:13 +0800
Subject: [PATCH 05/11] Apply suggestions from code review

Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
---
 python/docs/source/user_guide/dataframe_creation.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/docs/source/user_guide/dataframe_creation.rst b/python/docs/source/user_guide/dataframe_creation.rst
index 04c08e65aae59..be60f74d267f8 100644
--- a/python/docs/source/user_guide/dataframe_creation.rst
+++ b/python/docs/source/user_guide/dataframe_creation.rst
@@ -149,7 +149,7 @@ Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
     +---+---+
 
 
-Creating Through `read.format(...).load(...)`
+Creating through `read.format(...).load(...)`
 ---------------------------------------------
 
 Creating a PySpark :class:`DataFrame` by reading existing **json** format file data
@@ -173,8 +173,8 @@ Creating a PySpark :class:`DataFrame` by reading existing **csv** format file da
 
 .. code-block:: python
 
-    >>> df = spark.read.format("csv").option("header", "true").
-    ...     load("python/test_support/sql/people.csv")
+    >>> df = spark.read.format("csv").option("header", "true").load(
+    ...     "python/test_support/sql/people.csv")
     >>> df.show()
     +----+-------+
     | age|   name|

From 84df90c4e070b5f7dda687d7ebb060242601b11b Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Fri, 24 Nov 2023 15:57:11 +0800
Subject: [PATCH 06/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 python/docs/source/user_guide/index.rst       |  1 -
 .../{ => sql}/dataframe_creation.rst          | 70 +++++++++++--------
 python/docs/source/user_guide/sql/index.rst   |  7 +-
 3 files changed, 43 insertions(+), 35 deletions(-)
 rename python/docs/source/user_guide/{ => sql}/dataframe_creation.rst (80%)

diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index 86f1a37b7f557..67f8c8d4d0fe3 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -26,7 +26,6 @@ PySpark specific user guides are available here:
    :maxdepth: 2
 
    python_packaging
-   dataframe_creation
    sql/index
    pandas_on_spark/index
 
diff --git a/python/docs/source/user_guide/dataframe_creation.rst b/python/docs/source/user_guide/sql/dataframe_creation.rst
similarity index 80%
rename from python/docs/source/user_guide/dataframe_creation.rst
rename to python/docs/source/user_guide/sql/dataframe_creation.rst
index be60f74d267f8..bc3476c68c342 100644
--- a/python/docs/source/user_guide/dataframe_creation.rst
+++ b/python/docs/source/user_guide/sql/dataframe_creation.rst
@@ -21,14 +21,7 @@ DataFrame Creation
 
 .. currentmodule:: pyspark.sql
 
-Basic Data Structures
----------------------
-
-PySpark provides an important class for handling data:
-
-1. :class:`DataFrame`: a distributed collection of data grouped into named columns.
-
-Creating Through `createDataFrame`
+Creating through `createDataFrame`
 ----------------------------------
 
 A PySpark :class:`DataFrame` can be created via :meth:`SparkSession.createDataFrame` typically by passing
@@ -85,6 +78,21 @@ Creating a PySpark :class:`DataFrame` with the explicit schema specified
     +-----+---+
 
 
+Creating a PySpark :class:`DataFrame` with the explicit DDL-formatted string schema specified
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema = "name string, age int")
+    >>> df.show()
+    +-----+---+
+    | name|age|
+    +-----+---+
+    |Alice|  1|
+    |  Bob|  5|
+    +-----+---+
+
+
 Creating a PySpark :class:`DataFrame` from a list of dictionaries
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -190,14 +198,22 @@ Creating a PySpark :class:`DataFrame` by reading existing **parquet** format fil
 
 .. code-block:: python
 
-    >>> df = spark.read.format("parquet").load("python/test_support/sql/people.parquet")
-    >>> df.show()
+    >>> # Write a Parquet file to the temporary directory, and read it back
+    >>> import tempfile
+    >>> with tempfile.TemporaryDirectory() as d:
+    ...     # Overwrite the path with a new Parquet file
+    ...     spark.createDataFrame(
+    ...         [{"age": None, "name": "Michael"}, {"age": 30, "name": "Andy"}]
+    ...     ).write.mode("overwrite").format("parquet").save(d)
+    ...
+    ...     # Read the Parquet file as a DataFrame
+    ...     df = spark.read.format("parquet").load(d)
+    ...     df.show()
     +----+-------+
     | age|   name|
     +----+-------+
-    |NULL|Michael|
     |  30|   Andy|
-    |  19| Justin|
+    |NULL|Michael|
     +----+-------+
 
 Creating a PySpark :class:`DataFrame` by reading existing **orc** format file data
@@ -205,29 +221,21 @@ Creating a PySpark :class:`DataFrame` by reading existing **orc** format file da
 
 .. code-block:: python
 
-    >>> df = spark.read.format("parquet").load("python/test_support/sql/people.orc")
-    >>> df.show()
+    >>> # Write a Orc file to the temporary directory, and read it back
+    >>> import tempfile
+    >>> with tempfile.TemporaryDirectory() as d:
+    ...     # Overwrite the path with a new Orc file
+    ...     spark.createDataFrame(
+    ...         [{"age": None, "name": "Michael"}, {"age": 30, "name": "Andy"}]
+    ...     ).write.mode("overwrite").format("orc").save(d)
+    ...
+    ...     # Read the Orc file as a DataFrame
+    ...     df = spark.read.format("orc").load(d)
+    ...     df.show()
     +----+-------+
     | age|   name|
     +----+-------+
-    |NULL|Michael|
     |  30|   Andy|
-    |  19| Justin|
-    +----+-------+
-
-
-Creating a PySpark :class:`DataFrame` by reading data from other databases using **JDBC**
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-    >>> df = spark.read.format("jdbc").options(url=url, dbtable=dbtable).load()
-    >>> df.show()
-    +----+-------+
-    | age|   name|
-    +----+-------+
     |NULL|Michael|
-    |  30|   Andy|
-    |  19| Justin|
     +----+-------+
 
diff --git a/python/docs/source/user_guide/sql/index.rst b/python/docs/source/user_guide/sql/index.rst
index 118cf139d9b38..e4da562503191 100644
--- a/python/docs/source/user_guide/sql/index.rst
+++ b/python/docs/source/user_guide/sql/index.rst
@@ -16,13 +16,14 @@
     under the License.
 
 
-=========
-Spark SQL
-=========
+=================
+DataFrame and SQL
+=================
 
 .. toctree::
    :maxdepth: 2
 
+   dataframe_creation
    arrow_pandas
    python_udtf
    type_conversions

From 98195bb7dfde952f829504c0699b8355650da127 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Fri, 24 Nov 2023 16:33:44 +0800
Subject: [PATCH 07/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 python/docs/source/user_guide/sql/dataframe_creation.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/docs/source/user_guide/sql/dataframe_creation.rst b/python/docs/source/user_guide/sql/dataframe_creation.rst
index bc3476c68c342..28d794232604a 100644
--- a/python/docs/source/user_guide/sql/dataframe_creation.rst
+++ b/python/docs/source/user_guide/sql/dataframe_creation.rst
@@ -205,7 +205,6 @@ Creating a PySpark :class:`DataFrame` by reading existing **parquet** format fil
     ...     spark.createDataFrame(
     ...         [{"age": None, "name": "Michael"}, {"age": 30, "name": "Andy"}]
     ...     ).write.mode("overwrite").format("parquet").save(d)
-    ...
     ...     # Read the Parquet file as a DataFrame
     ...     df = spark.read.format("parquet").load(d)
     ...     df.show()
@@ -228,7 +227,6 @@ Creating a PySpark :class:`DataFrame` by reading existing **orc** format file da
     ...     spark.createDataFrame(
     ...         [{"age": None, "name": "Michael"}, {"age": 30, "name": "Andy"}]
     ...     ).write.mode("overwrite").format("orc").save(d)
-    ...
     ...     # Read the Orc file as a DataFrame
     ...     df = spark.read.format("orc").load(d)
     ...     df.show()

From 8b4d8d5767a2d138f512fccd75da6e5bedd55350 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 30 Nov 2023 09:24:47 +0800
Subject: [PATCH 08/11] Apply suggestions from code review

Co-authored-by: allisonwang-db <allison.wang@databricks.com>
---
 .../user_guide/sql/dataframe_creation.rst     | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/docs/source/user_guide/sql/dataframe_creation.rst b/python/docs/source/user_guide/sql/dataframe_creation.rst
index 28d794232604a..3ea7d4154020c 100644
--- a/python/docs/source/user_guide/sql/dataframe_creation.rst
+++ b/python/docs/source/user_guide/sql/dataframe_creation.rst
@@ -18,7 +18,7 @@
 ==================
 DataFrame Creation
 ==================
-
+PySpark allows you to create DataFrames in several ways. Let's explore these methods with simple examples.
 .. currentmodule:: pyspark.sql
 
 Creating through `createDataFrame`
@@ -30,7 +30,7 @@ a NumPy :class:`numpy.ndarray` and an :class:`pyspark.RDD`.
 :meth:`SparkSession.createDataFrame` takes the `schema` argument to specify the schema of the :class:`DataFrame`.
 When it is omitted, PySpark infers the corresponding schema by taking a sample from the data.
 
-Creating a PySpark :class:`DataFrame` from a list of lists
+Creating a :class:`DataFrame` from Lists
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
@@ -60,14 +60,16 @@ Creating a PySpark :class:`DataFrame` from a list of tuples
     +-----+---+
 
 
-Creating a PySpark :class:`DataFrame` with the explicit schema specified
+Creating a :class:`DataFrame` with a Specified Schema
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
+Define a schema and use it to create a DataFrame. A schema describes the column names and types.
 .. code-block:: python
 
     >>> from pyspark.sql.types import *
-    >>> schema = StructType([StructField("name", StringType(), True),
-    ...     StructField("age", IntegerType(), True)])
+schema = StructType([
+    StructField("name", StringType(), True),
+    StructField("age", IntegerType(), True)
+])
     >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
     >>> df.show()
     +-----+---+
@@ -93,8 +95,9 @@ Creating a PySpark :class:`DataFrame` with the explicit DDL-formatted string sch
     +-----+---+
 
 
-Creating a PySpark :class:`DataFrame` from a list of dictionaries
+Creating a :class:`DataFrame` from Dictionaries
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Dictionaries with keys as column names can also be used.
 
 .. code-block:: python
 
@@ -107,8 +110,9 @@ Creating a PySpark :class:`DataFrame` from a list of dictionaries
     +---+-----+
 
 
-Creating a PySpark :class:`DataFrame` from a list of :class:`Row`
+Creating a :class:`DataFrame` from :class:`Row`s
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use the Row type to define rows of a DataFrame.
 
 .. code-block:: python
 
@@ -124,7 +128,7 @@ Creating a PySpark :class:`DataFrame` from a list of :class:`Row`
     +-----+---+
 
 
-Creating a PySpark :class:`DataFrame` from a :class:`pandas.DataFrame`
+Creating a :class:`DataFrame` from a :class:`pandas.DataFrame` or a :class:`numpy.ndarray`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python

From 1ffcf88de442c7f39f259c8cf1ff7a180c79ddc1 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 30 Nov 2023 10:16:02 +0800
Subject: [PATCH 09/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 ...e_creation.rst => creating_dataframes.rst} | 85 +++++++------------
 python/docs/source/user_guide/sql/index.rst   |  8 +-
 2 files changed, 36 insertions(+), 57 deletions(-)
 rename python/docs/source/user_guide/sql/{dataframe_creation.rst => creating_dataframes.rst} (65%)

diff --git a/python/docs/source/user_guide/sql/dataframe_creation.rst b/python/docs/source/user_guide/sql/creating_dataframes.rst
similarity index 65%
rename from python/docs/source/user_guide/sql/dataframe_creation.rst
rename to python/docs/source/user_guide/sql/creating_dataframes.rst
index 3ea7d4154020c..021b58c325a7d 100644
--- a/python/docs/source/user_guide/sql/dataframe_creation.rst
+++ b/python/docs/source/user_guide/sql/creating_dataframes.rst
@@ -15,25 +15,20 @@
     specific language governing permissions and limitations
     under the License.
 
-==================
-DataFrame Creation
-==================
-PySpark allows you to create DataFrames in several ways. Let's explore these methods with simple examples.
-.. currentmodule:: pyspark.sql
 
-Creating through `createDataFrame`
-----------------------------------
+===================
+Creating DataFrames
+===================
+
+PySpark allows you to create :class:`DataFrame`s in several ways. Let's explore these methods with simple examples.
 
-A PySpark :class:`DataFrame` can be created via :meth:`SparkSession.createDataFrame` typically by passing
-a list of lists, tuples, dictionaries and :class:`Row`, a pandas :class:`pandas.DataFrame`,
-a NumPy :class:`numpy.ndarray` and an :class:`pyspark.RDD`.
-:meth:`SparkSession.createDataFrame` takes the `schema` argument to specify the schema of the :class:`DataFrame`.
-When it is omitted, PySpark infers the corresponding schema by taking a sample from the data.
+.. currentmodule:: pyspark.sql
 
 Creating a :class:`DataFrame` from Lists
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
+    :emphasize-lines: 4
 
     >>> df = spark.createDataFrame([['Alice', 1], ['Bob', 5]])
     >>> df.show()
@@ -45,31 +40,18 @@ Creating a :class:`DataFrame` from Lists
     +-----+---+
 
 
-Creating a PySpark :class:`DataFrame` from a list of tuples
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. code-block:: python
-
-    >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)])
-    >>> df.show()
-    +-----+---+
-    |   _1| _2|
-    +-----+---+
-    |Alice|  1|
-    |  Bob|  5|
-    +-----+---+
-
-
 Creating a :class:`DataFrame` with a Specified Schema
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Define a schema and use it to create a DataFrame. A schema describes the column names and types.
+
 .. code-block:: python
 
-    >>> from pyspark.sql.types import *
-schema = StructType([
-    StructField("name", StringType(), True),
-    StructField("age", IntegerType(), True)
-])
+    >>> from pyspark.sql.types import StructType, StructField, StringType, IntegerType
+    >>> schema = StructType([
+    ...     StructField("name", StringType(), True),
+    ...     StructField("age", IntegerType(), True)
+    ...     ])
     >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
     >>> df.show()
     +-----+---+
@@ -79,9 +61,7 @@ schema = StructType([
     |  Bob|  5|
     +-----+---+
 
-
-Creating a PySpark :class:`DataFrame` with the explicit DDL-formatted string schema specified
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use DDL-formatted string schema to create a DataFrame.
 
 .. code-block:: python
 
@@ -96,7 +76,8 @@ Creating a PySpark :class:`DataFrame` with the explicit DDL-formatted string sch
 
 
 Creating a :class:`DataFrame` from Dictionaries
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Dictionaries with keys as column names can also be used.
 
 .. code-block:: python
@@ -111,7 +92,8 @@ Dictionaries with keys as column names can also be used.
 
 
 Creating a :class:`DataFrame` from :class:`Row`s
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Use the Row type to define rows of a DataFrame.
 
 .. code-block:: python
@@ -129,7 +111,7 @@ Use the Row type to define rows of a DataFrame.
 
 
 Creating a :class:`DataFrame` from a :class:`pandas.DataFrame` or a :class:`numpy.ndarray`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -143,9 +125,6 @@ Creating a :class:`DataFrame` from a :class:`pandas.DataFrame` or a :class:`nump
     +---+---+
 
 
-Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 .. code-block:: python
 
     >>> import numpy as np
@@ -161,11 +140,11 @@ Creating a PySpark :class:`DataFrame` from a :class:`numpy.ndarray`
     +---+---+
 
 
-Creating through `read.format(...).load(...)`
----------------------------------------------
+Reading Data from Files
+-----------------------
 
-Creating a PySpark :class:`DataFrame` by reading existing **json** format file data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Example with **JSON**
+~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -180,8 +159,8 @@ Creating a PySpark :class:`DataFrame` by reading existing **json** format file d
     +----+-------+
 
 
-Creating a PySpark :class:`DataFrame` by reading existing **csv** format file data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Example with **CSV**
+~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -197,8 +176,8 @@ Creating a PySpark :class:`DataFrame` by reading existing **csv** format file da
     +----+-------+
 
 
-Creating a PySpark :class:`DataFrame` by reading existing **parquet** format file data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Example with **PARQUET**
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
@@ -219,8 +198,8 @@ Creating a PySpark :class:`DataFrame` by reading existing **parquet** format fil
     |NULL|Michael|
     +----+-------+
 
-Creating a PySpark :class:`DataFrame` by reading existing **orc** format file data
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Example with **ORC**
+~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
 
diff --git a/python/docs/source/user_guide/sql/index.rst b/python/docs/source/user_guide/sql/index.rst
index e4da562503191..1d3c9057f99e5 100644
--- a/python/docs/source/user_guide/sql/index.rst
+++ b/python/docs/source/user_guide/sql/index.rst
@@ -16,14 +16,14 @@
     under the License.
 
 
-=================
-DataFrame and SQL
-=================
+========================
+Spark SQL and DataFrames
+========================
 
 .. toctree::
    :maxdepth: 2
 
-   dataframe_creation
+   creating_dataframes
    arrow_pandas
    python_udtf
    type_conversions

From 3feff95acf8898b50f783beed741da9b0556d850 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 30 Nov 2023 12:03:16 +0800
Subject: [PATCH 10/11] [SPARK-45861][PYTHON][DOCS] Add user guide for
 dataframe creation

---
 .../user_guide/sql/creating_dataframes.rst    | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/docs/source/user_guide/sql/creating_dataframes.rst b/python/docs/source/user_guide/sql/creating_dataframes.rst
index 021b58c325a7d..507f632479421 100644
--- a/python/docs/source/user_guide/sql/creating_dataframes.rst
+++ b/python/docs/source/user_guide/sql/creating_dataframes.rst
@@ -20,16 +20,17 @@
 Creating DataFrames
 ===================
 
-PySpark allows you to create :class:`DataFrame`s in several ways. Let's explore these methods with simple examples.
-
 .. currentmodule:: pyspark.sql
 
+PySpark allows you to create :class:`DataFrame`\s in several ways. Let's explore these methods with simple examples.
+
 Creating a :class:`DataFrame` from Lists
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: python
-    :emphasize-lines: 4
+    :emphasize-lines: 5
 
+    >>> # when the schema is not provided, the resulting DataFrame has _1 and _2 as the schema
     >>> df = spark.createDataFrame([['Alice', 1], ['Bob', 5]])
     >>> df.show()
     +-----+---+
@@ -43,7 +44,7 @@ Creating a :class:`DataFrame` from Lists
 Creating a :class:`DataFrame` with a Specified Schema
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Define a schema and use it to create a DataFrame. A schema describes the column names and types.
+Define a schema and use it to create a :class:`DataFrame`. A schema describes the column names and types.
 
 .. code-block:: python
 
@@ -51,7 +52,7 @@ Define a schema and use it to create a DataFrame. A schema describes the column
     >>> schema = StructType([
     ...     StructField("name", StringType(), True),
     ...     StructField("age", IntegerType(), True)
-    ...     ])
+    ... ])
     >>> df = spark.createDataFrame([('Alice', 1), ('Bob', 5)], schema)
     >>> df.show()
     +-----+---+
@@ -61,7 +62,7 @@ Define a schema and use it to create a DataFrame. A schema describes the column
     |  Bob|  5|
     +-----+---+
 
-Use DDL-formatted string schema to create a DataFrame.
+Use DDL-formatted string schema to create a :class:`DataFrame`.
 
 .. code-block:: python
 
@@ -91,10 +92,10 @@ Dictionaries with keys as column names can also be used.
     +---+-----+
 
 
-Creating a :class:`DataFrame` from :class:`Row`s
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Creating a :class:`DataFrame` from :class:`Row`\s
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Use the Row type to define rows of a DataFrame.
+Use the :class:`Row` type to define rows of a :class:`DataFrame`.
 
 .. code-block:: python
 

From 2f0ae13a9afada93b5598439d3e7ae4f22113c89 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 14 Dec 2023 11:43:02 +0800
Subject: [PATCH 11/11] Apply suggestions from code review

Co-authored-by: Nicholas Chammas <nicholas.chammas@gmail.com>
---
 python/docs/source/user_guide/sql/creating_dataframes.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/docs/source/user_guide/sql/creating_dataframes.rst b/python/docs/source/user_guide/sql/creating_dataframes.rst
index 507f632479421..e81f27f2b4d46 100644
--- a/python/docs/source/user_guide/sql/creating_dataframes.rst
+++ b/python/docs/source/user_guide/sql/creating_dataframes.rst
@@ -182,7 +182,7 @@ Example with **PARQUET**
 
 .. code-block:: python
 
-    >>> # Write a Parquet file to the temporary directory, and read it back
+    >>> # Write a Parquet file to a temporary directory and read it back.
     >>> import tempfile
     >>> with tempfile.TemporaryDirectory() as d:
     ...     # Overwrite the path with a new Parquet file
@@ -204,14 +204,14 @@ Example with **ORC**
 
 .. code-block:: python
 
-    >>> # Write a Orc file to the temporary directory, and read it back
+    >>> # Write an ORC file to a temporary directory and read it back.
     >>> import tempfile
     >>> with tempfile.TemporaryDirectory() as d:
     ...     # Overwrite the path with a new Orc file
     ...     spark.createDataFrame(
     ...         [{"age": None, "name": "Michael"}, {"age": 30, "name": "Andy"}]
     ...     ).write.mode("overwrite").format("orc").save(d)
-    ...     # Read the Orc file as a DataFrame
+    ...     # Read the ORC file as a DataFrame
     ...     df = spark.read.format("orc").load(d)
     ...     df.show()
     +----+-------+