# Show Data
![image.png](attachment:1f7975f8-4e99-4e72-aeb9-5898e23dcbea.png)

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import functions as F
from pyspark.sql import Window

# Create SparkSession
spark = (SparkSession.builder
                    .appName('PySparkSyntax')
                    .getOrCreate()
        )

# Define the schema for a DataFrame
schema = StructType([
    StructField("name", StringType(), True),
    StructField("age", IntegerType(), True),
    StructField("city", StringType(), True)
])


# Create a DataFrame using the schema
data = [("Alice", 25, "New York")
        , ("Bob", 30, "San Francisco")
        , ("Bob", 12, "Las Vegas")
        , ("Charlie", 35, "Chicago")
        , ("Charlie", 35, "Chicago")]
df = spark.createDataFrame(data, schema)

# Show the DataFrame
df.show()

## [show](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html)

DataFrame.show(n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False) → None

Prints the first n rows to the console.

Parameters:

n:int, optional

Number of rows to show.

truncate: bool or int, optional

If set to True, truncate strings longer than 20 chars by default. If set to a number greater than one, truncates long strings to length truncate and align cells right.

vertical: bool, optional

If set to True, print output rows vertically (one line per column value).

In [None]:
df.show()

In [None]:
df.show(3)

In [None]:
df.show(3, 3)

In [None]:
df.show(3, 3, True)

In [None]:
df.show(vertical=True)

## [toPandas](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.toPandas.html)

DataFrame.toPandas() → PandasDataFrameLike

Returns the contents of this DataFrame as Pandas pandas.DataFrame.

This is only available if Pandas is installed and available.

In [None]:
df.toPandas()

### [count()](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.count.html#pyspark.sql.DataFrame.count)
count() → int

Returns the number of rows in this DataFrame.

In [None]:
df.count()

## [limit](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.limit.html)

DataFrame.limit(num: int) → pyspark.sql.dataframe.DataFrame

Limits the result count to the number specified.

In [None]:
df = df.select('name', 'age')

df.limit(1).toPandas()

## [collect](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.collect.html#pyspark.sql.DataFrame.collect)

collect() → List[pyspark.sql.types.Row]

Returns all the records as a list of Row.

In [None]:
df.collect()

In [None]:
type(df.collect())

In [None]:
for row in df.collect():
    print(row['name'], row['age'])

## [take](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.take.html#pyspark.sql.DataFrame.take)

take(num: int) → List[pyspark.sql.types.Row]

Returns the first num rows as a list of Row.

In [None]:
df.take(2)

## [head](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.head.html)

DataFrame.head(n: Optional[int] = None) → Union[pyspark.sql.types.Row, None, List[pyspark.sql.types.Row]]

Returns the first n rows.

In [None]:
df.head(2)

## [first](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.first.html)

DataFrame.first() → Optional[pyspark.sql.types.Row]

Returns the first row as a Row.

In [None]:
df.first()

## [tail](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.tail.html#pyspark.sql.DataFrame.tail)

tail(num: int) → List[pyspark.sql.types.Row]

Returns the last num rows as a list of Row.

In [None]:
df.tail(2)