# Read and write 
The objective of this chapter is to read and write into diferent formats using spark

Let's create an example

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("ReadWriteFiles").getOrCreate()

In [None]:
# Data creation

# Create a sample DataFrame
data = [("Alice", 30), ("Bob", 35), ("Charlie", 28), ("Daniel", 55)]
columns = ["Name", "Age"]
df = spark.createDataFrame(data, columns)


## JSON Files

In [None]:
df_json = df.select(df.Name, df.Age)

### Saving Json

In [None]:
# Save the data into a json file
# it creates multiple files
df_json.write.json("json_multiple_file")

In [None]:
# it creates a single file
df_json.repartition(1).write.mode("overwrite").format("json").option("header", "true").json("single_json_file") 

### Reading JSON

In [None]:
# Read single JSON

# Infer schema automatically
df_json_r = spark.read.json("single_json_file")

df_json_r


In [None]:
df_json_r.collect()

In [None]:
# Read multiple JSON

df_multiple_json = spark.read.json("json_multiple_file")

In [None]:
df_multiple_json.collect()

In [None]:
# you can also save or read by Partitions

## CSV Files

In [None]:
df_csv = df.select(df.Name, df.Age)
df_csv.write.csv("csv_files") 

# Multiple files are created, for single file apply the same strategy we did with json files

## PARQUET Files

In [None]:
df_parquet = df.select(df.Name, df.Age)


In [None]:
df_parquet.write.parquet("parquet_files")

In [None]:
spark.stop()

## Other Formats
You can explore to read from a database using the jdbc connector.

In [None]:
"""
# Data Base Connection
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder.appName("ReadFromDatabase").getOrCreate()

jdbcDF = spark.read \
  .format("jdbc") \
  .option("url", "jdbc:mysql://your_host:3306/your_database") \
  .option("driver", "com.mysql.jdbc.Driver") \
  .option("dbtable", "your_table") \
  .option("user", "your_username") \
  .option("password", "your_password") \
  .load()

# Access data like any other DataFrame
jdbcDF.select(col("column_name")).show()




"""