# Netflix TV Shows and Movies Exploratory Data Analysis

In [None]:
import $ivy.`org.apache.spark::spark-sql:2.4.5`

In [None]:
import org.apache.log4j.{Level, Logger}
Logger.getLogger("org").setLevel(Level.OFF)

In [None]:
import org.apache.spark.sql._

implicit val spark = {
  NotebookSparkSession.builder()
    .master("local[*]")
    .getOrCreate()
} 
import org.apache.spark.sql.functions._

### Load Netflix Dataset

In [None]:
val netflixDF = spark.read
    .option("header", "true")
    .option("inferSchema", "true")
    .csv("netflix_titles.csv")

### Display Schema and Data

In [None]:
netflixDF.printSchema()
netflixDF.show(5, false)

### Exploratory Analysis

In [None]:
// Count rows
val rowCount = netflixDF.count()
println(s"Total Rows: $rowCount")

In [None]:
// Count by type
netflixDF.groupBy("type").count().show()

In [None]:
// Most common genres
netflixDF.groupBy("listed_in").count()
    .orderBy(desc("count"))
    .show(10, false)

### Trend Analysis

In [None]:
// Trend over years
netflixDF.groupBy("release_year").count()
    .orderBy("release_year")
    .show()

### Top Rated Movies and Shows

In [None]:
// Top titles with maximum duration
netflixDF.filter(col("type") === "Movie")
    .orderBy(desc("duration"))
    .select("title", "duration")
    .show(5, false)