From 67df31d26bf168cfb9176df001cefcfdb67fa5b6 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 27 Feb 2024 11:29:33 +0900 Subject: [PATCH 1/3] Add a test case for createDataFrame with dataclasses --- python/pyspark/sql/session.py | 2 +- python/pyspark/sql/tests/test_types.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 0e367eeaf044c..1d9a1949c34eb 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1299,7 +1299,7 @@ def createDataFrame( # type: ignore[misc] ---------- data : :class:`RDD` or iterable an RDD of any kind of SQL data representation (:class:`Row`, - :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, + :class:`tuple`, ``int``, ``boolean``, ``dict``` etc.), or :class:`list`, :class:`pandas.DataFrame` or :class:`numpy.ndarray`. schema : :class:`pyspark.sql.types.DataType`, str or list, optional a :class:`pyspark.sql.types.DataType` or a datatype string or a list of diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index e61725d471dd8..f3169eb28a728 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -23,6 +23,7 @@ import pickle import sys import unittest +from dataclasses import dataclass, asdict from pyspark.sql import Row from pyspark.sql import functions as F @@ -412,6 +413,17 @@ def test_create_dataframe_from_dict_respects_schema(self): df = self.spark.createDataFrame([{"a": 1}], ["b"]) self.assertEqual(df.columns, ["b"]) + def test_create_dataframe_from_dataclasses(self): + @dataclass + class User: + name: str + age: int + is_active: bool + + user = User(name="John", age=30, is_active=True) + r = self.spark.createDataFrame([user]).first() + self.assertEqual(asdict(user), r.asDict()) + def test_negative_decimal(self): try: self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal=true") From 4088fc8c8fa8f8f66a55e435d569650006ee3e68 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 27 Feb 2024 11:33:40 +0900 Subject: [PATCH 2/3] Update python/pyspark/sql/session.py --- python/pyspark/sql/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 1d9a1949c34eb..8491dcc67ba05 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1299,7 +1299,7 @@ def createDataFrame( # type: ignore[misc] ---------- data : :class:`RDD` or iterable an RDD of any kind of SQL data representation (:class:`Row`, - :class:`tuple`, ``int``, ``boolean``, ``dict``` etc.), or :class:`list`, + :class:`tuple`, ``int``, ``boolean``, ``dict`` etc.), or :class:`list`, :class:`pandas.DataFrame` or :class:`numpy.ndarray`. schema : :class:`pyspark.sql.types.DataType`, str or list, optional a :class:`pyspark.sql.types.DataType` or a datatype string or a list of From f1b943c324eb3fc3e64eede81804fc91024e45d4 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 27 Feb 2024 11:34:04 +0900 Subject: [PATCH 3/3] Update python/pyspark/sql/session.py --- python/pyspark/sql/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 8491dcc67ba05..0a19921f7286a 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1299,7 +1299,7 @@ def createDataFrame( # type: ignore[misc] ---------- data : :class:`RDD` or iterable an RDD of any kind of SQL data representation (:class:`Row`, - :class:`tuple`, ``int``, ``boolean``, ``dict`` etc.), or :class:`list`, + :class:`tuple`, ``int``, ``boolean``, ``dict``, etc.), or :class:`list`, :class:`pandas.DataFrame` or :class:`numpy.ndarray`. schema : :class:`pyspark.sql.types.DataType`, str or list, optional a :class:`pyspark.sql.types.DataType` or a datatype string or a list of