# PySpark: Pandas with Apache Arrow
[Reference](https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html#apache-arrow-in-spark)

In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder. \
    appName("pyspark-arrow"). \
    getOrCreate()

In [3]:
import numpy as np
import pandas as pd

# Enable Arrow-based columnar data transfers
spark.conf.set("spark.sql.execution.arrow.enabled", "true")

In [4]:
# Generate a Pandas DataFrame
pdf = pd.DataFrame(np.random.rand(100, 3))

# Create a Spark DataFrame from a Pandas DataFrame using Arrow
df = spark.createDataFrame(pdf)

In [5]:
# Convert the Spark DataFrame back to a Pandas DataFrame using Arrow
result_pdf = df.select("*").toPandas()

result_pdf



Unnamed: 0,0,1,2
0,0.581765,0.421754,0.746082
1,0.450096,0.838185,0.650798
2,0.906061,0.902553,0.582332
3,0.027134,0.367107,0.342978
4,0.271463,0.658056,0.881614
...,...,...,...
95,0.894291,0.621559,0.434179
96,0.336394,0.382479,0.723049
97,0.940094,0.693528,0.695185
98,0.571244,0.793291,0.476467


In [6]:
spark.stop()