In [0]:
# Writing data to a CSV file in DBFS
output_csv_path = "/FileStore/onlinefoods.csv"
csv_data = spark.read.csv(output_csv_path, header=True, inferSchema = True)
csv_data.show()

+---+------+--------------+--------------+---------------+--------------------------+-----------+--------+---------+--------+------+---------+
|Age|Gender|Marital Status|    Occupation| Monthly Income|Educational Qualifications|Family size|latitude|longitude|Pin code|Output| Feedback|
+---+------+--------------+--------------+---------------+--------------------------+-----------+--------+---------+--------+------+---------+
| 20|Female|        Single|       Student|      No Income|             Post Graduate|          4| 12.9766|  77.5993|  560001|   Yes| Positive|
| 24|Female|        Single|       Student| Below Rs.10000|                  Graduate|          3|  12.977|  77.5773|  560009|   Yes| Positive|
| 22|  Male|        Single|       Student| Below Rs.10000|             Post Graduate|          3| 12.9551|  77.6593|  560017|   Yes|Negative |
| 22|Female|        Single|       Student|      No Income|                  Graduate|          6| 12.9473|  77.5616|  560019|   Yes| Positive|

In [0]:
# Reading a Parquet file from DBFS
parquet_file_path = "/FileStore/MT_cars.parquet"
parquet_data = spark.read.parquet(parquet_file_path)
parquet_data.show()

+-------------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|              model| mpg|cyl| disp| hp|drat|   wt| qsec| vs| am|gear|carb|
+-------------------+----+---+-----+---+----+-----+-----+---+---+----+----+
|          Mazda RX4|21.0|  6|160.0|110| 3.9| 2.62|16.46|  0|  1|   4|   4|
|      Mazda RX4 Wag|21.0|  6|160.0|110| 3.9|2.875|17.02|  0|  1|   4|   4|
|         Datsun 710|22.8|  4|108.0| 93|3.85| 2.32|18.61|  1|  1|   4|   1|
|     Hornet 4 Drive|21.4|  6|258.0|110|3.08|3.215|19.44|  1|  0|   3|   1|
|  Hornet Sportabout|18.7|  8|360.0|175|3.15| 3.44|17.02|  0|  0|   3|   2|
|            Valiant|18.1|  6|225.0|105|2.76| 3.46|20.22|  1|  0|   3|   1|
|         Duster 360|14.3|  8|360.0|245|3.21| 3.57|15.84|  0|  0|   3|   4|
|          Merc 240D|24.4|  4|146.7| 62|3.69| 3.19| 20.0|  1|  0|   4|   2|
|           Merc 230|22.8|  4|140.8| 95|3.92| 3.15| 22.9|  1|  0|   4|   2|
|           Merc 280|19.2|  6|167.6|123|3.92| 3.44| 18.3|  1|  0|   4|   4|
|          M

In [0]:
# Reading JSON Files:
# Define schema for the JSON data
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, ArrayType

custom_schema = StructType([
    StructField("address", StructType([
        StructField("city", StringType(), True),
        StructField("state", StringType(), True),
        StructField("streetAddress", StringType(), True)
    ]), True),
    StructField("age", IntegerType(), True),
    StructField("firstName", StringType(), True),
    StructField("gender", StringType(), True),
    StructField("lastName", StringType(), True),
    StructField("phoneNumbers", ArrayType(StructType([
        StructField("number", StringType(), True),
        StructField("type", StringType(), True)
    ]), True))
])
json_data = "/FileStore/sample_json_1.json"

df = spark.read.json(json_data, schema=custom_schema, multiLine=True)
df.show()

+--------------------+---+---------+------+--------+--------------------+
|             address|age|firstName|gender|lastName|        phoneNumbers|
+--------------------+---+---------+------+--------+--------------------+
|{San Diego, CA, 101}| 28|      Joe|  male| Jackson|[{7349282382, home}]|
+--------------------+---+---------+------+--------+--------------------+



In [0]:
# Writing data to a CSV file in DBFS
csv_path = "/FileStore/write_csv1.csv"
csv_data.write.csv(csv_path, header=True)

In [0]:
# Writing data to a Parquet file in DBFS
parquet_path = "/FileStore/write_parquet1.parquet"
parquet_data.write.parquet(parquet_path)

In [0]:
# Writing data to a text file in DBFS
text_path = "/FileStore/write_text_1.txt"
text_data.write.text(text_path)

In [0]:
# Sample JSON data as DataFrame (replace this with your actual DataFrame)
json_data = spark.createDataFrame([
    (1, "Alice", 30),
    (2, "Bob", 35),
    (3, "Charlie", 40)
], ["id", "name", "age"])

# Define the path to write the JSON file in DBFS
json_path = "/FileStore/write_json_output.json"

# Write the DataFrame to a JSON file in DBFS
json_data.write.json(json_path)

# Optional: Display the written JSON file path
print("JSON file written to:", json_path)

JSON file written to: /FileStore/write_json_output.json
