diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 0e367eeaf044c..0a19921f7286a 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1299,7 +1299,7 @@ def createDataFrame( # type: ignore[misc] ---------- data : :class:`RDD` or iterable an RDD of any kind of SQL data representation (:class:`Row`, - :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, + :class:`tuple`, ``int``, ``boolean``, ``dict``, etc.), or :class:`list`, :class:`pandas.DataFrame` or :class:`numpy.ndarray`. schema : :class:`pyspark.sql.types.DataType`, str or list, optional a :class:`pyspark.sql.types.DataType` or a datatype string or a list of diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index e61725d471dd8..f3169eb28a728 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -23,6 +23,7 @@ import pickle import sys import unittest +from dataclasses import dataclass, asdict from pyspark.sql import Row from pyspark.sql import functions as F @@ -412,6 +413,17 @@ def test_create_dataframe_from_dict_respects_schema(self): df = self.spark.createDataFrame([{"a": 1}], ["b"]) self.assertEqual(df.columns, ["b"]) + def test_create_dataframe_from_dataclasses(self): + @dataclass + class User: + name: str + age: int + is_active: bool + + user = User(name="John", age=30, is_active=True) + r = self.spark.createDataFrame([user]).first() + self.assertEqual(asdict(user), r.asDict()) + def test_negative_decimal(self): try: self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal=true")