# ATELIER 2

In [1]:
import scala.util.Random

case class Employee(
  employeeId: Int,
  lastName: String, 
  firstName: String,
  title: String,
  reportsTo: Int
)

case class MediaType(
  mediaTypeId: Int,
  name: String
)

case class Genre(
  genreId: Int,
  name: String
)

case class Track(
  trackId: Int,
  name: String,
  mediaTypeId: Int,
  genreId: Int
)

case class InvoiceItem(
  invoiceLineId: Int,
  invoiceId: Int,
  trackId: Int,
  unitPrice: BigDecimal,
  quantity: Int,
)

case class Invoice(
  invoiceId: Int,
  items: List[Int]
)


val employees = Array(
  Employee(1, "Adams", "Andrew", "General Manager", 0),
  Employee(2, "Edwards", "Nancy", "Sales Manager", 1), 
  Employee(3, "Peacock", "Jane", "Sales Support Agent", 2),
  Employee(4, "Park", "Margaret", "Sales Support Agent", 2),
  Employee(5, "Johnson", "Steve", "Sales Support Agent", 2),
  Employee(6, "Mitchell", "Michael", "IT Manager", 1),
  Employee(7, "King", "Robert", "IT Staff", 6),
  Employee(8, "Callahan", "Laura", "IT Staff", 6)
)

val mediaTypes = Array(
  MediaType(1, "MPEG audio file"),
  MediaType(2, "Protected AAC audio file"), 
  MediaType(3, "Protected MPEG-4 video file"),
  MediaType(4, "Purchased AAC audio file"),
  MediaType(5, "AAC audio file")
)

val genres = Array(
  Genre(1, "Rock"), 
  Genre(2, "Jazz"),
  Genre(3, "Metal"), 
  Genre(4, "Alternative & Punk"),
  Genre(5, "Rock And Roll"),
  Genre(6, "Blues"),
  Genre(7, "Latin"),
  Genre(8, "Reggae"),
  Genre(9, "Pop"),
  Genre(10, "Soundtrack"),
  Genre(11, "Bossa Nova"),
  Genre(12, "Easy Listening"),
  Genre(13, "Heavy Metal"), 
  Genre(14, "R&B/Soul"),
  Genre(15, "Electronica/Dance"),
  Genre(16, "World"), 
  Genre(17, "Hip Hop/Rap"),
  Genre(18, "Science Fiction"),
  Genre(19, "TV Shows"), 
  Genre(20, "Sci Fi & Fantasy"),
  Genre(21, "Drama"), 
  Genre(22, "Comedy"), 
  Genre(23, "Alternative"),
  Genre(24, "Classical"), 
  Genre(25, "Opera")
)

val adjectives = Array(
  "Cosmic", "Electric", "Velvet", "Mystic", "Atomic", "Phantom", "Silent", 
  "Savage", "Eternal", "Digital", "Lost", "Sacred", "Wild", "Urban", "Golden", 
  "Crystal", "Liquid", "Toxic", "Neon", "Crimson", "Stellar", "Raging", 
  "Arctic", "Sonic", "Primal", "Midnight", "Screaming", "Infinite", "Royal", "Lunar"
)

val nouns = Array(
  "Echo", "Giants", "Wolves", "Horizon", "Empire", "Void", "Thunder", "Dragons", 
  "Saints", "Kings", "Ghosts", "Ravens", "Pirates", "Heroes", "Rebels", "Demons", 
  "Machines", "Angels", "Knights", "Lions", "Shadows", "Zombies", "Wizards", 
  "Killers", "Titans", "Outlaws", "Rebels", "Prophets", "Bandits", "Warriors"
)

val complements = Array(
  "of Doom", "in Chains", "from Mars", "of the North", "of Death", "in Disguise", 
  "of the Night", "from Hell", "of Tomorrow", "in Flames", "of the Deep", 
  "from Beyond", "of Destruction", "in Shadow", "from the Sky", "of the Abyss", 
  "in Exile", "from the East", "of Eternity", "from the Desert", "of the Underground"
)

object Track {
  def generateTrack(trackId: Int): Track = {
    val randomGenre = genres(Random.nextInt(genres.length))
    val randomMediaType = mediaTypes(Random.nextInt(mediaTypes.length))
    
    val adjective = adjectives(Random.nextInt(adjectives.length))
    val noun = nouns(Random.nextInt(nouns.length))
    val complement = complements(Random.nextInt(complements.length))
    
    val trackName = s"$adjective $noun $complement"
    
    Track(trackId, trackName, randomMediaType.mediaTypeId, randomGenre.genreId)
  }
}


object Invoice {
  def generateInvoice(invoiceId: Int, availableTracks: Array[Track]): Invoice = {
    val itemCount = Random.nextInt(5) + 1
    val selectedTracks = Random.shuffle(availableTracks.toList).take(itemCount)
    
    val items = selectedTracks.zipWithIndex.map { case (track, index) =>
      val quantity = if (Random.nextDouble() > 0.2) 1 else Random.nextInt(3) + 1
      val unitPrice = BigDecimal(0.99 + Random.nextDouble() * 1.01).setScale(2, BigDecimal.RoundingMode.HALF_UP)
      
      InvoiceItem(
        invoiceLineId = invoiceId * 10 + index + 1,
        invoiceId = invoiceId,
        trackId = track.trackId,
        unitPrice = unitPrice,
        quantity = quantity,
      )
    }
    
    Invoice(invoiceId, items.map(_.invoiceLineId))
  }
}

val tracks = for(index <- 1 to 200) yield Track.generateTrack(index)
val tracksArray = tracks.toArray
println("Liste des tracks :")
tracksArray.foreach(println)


val invoices = for(index <- 1 to 2000) yield Invoice.generateInvoice(index, tracksArray)
val invoicesArray = invoices.toArray
println("\nListe des invoices :")
invoicesArray.foreach(println)




Track(1,Raging Knights from the East,5,7)
Track(2,Toxic Machines in Chains,5,7)
Track(3,Midnight Shadows of Death,3,15)
Track(4,Eternal Prophets from Mars,3,18)
Track(5,Silent Kings of Destruction,4,3)
Track(6,Lost Angels in Chains,2,7)
Track(7,Atomic Ghosts in Flames,1,4)
Track(8,Liquid Outlaws in Flames,4,2)
Track(9,Wild Wizards of the Abyss,1,18)
Track(10,Velvet Warriors of the Abyss,2,8)
Track(11,Sonic Giants in Flames,3,9)
Track(12,Wild Ravens from Beyond,4,3)
Track(13,Arctic Ghosts in Disguise,4,13)
Liste des tracks :
Track(14,Electric Demons of Destruction,4,5)
Track(15,Electric Knights in Flames,4,22)
Track(16,Primal Giants from the East,1,11)
Track(17,Screaming Outlaws in Disguise,5,23)
Track(18,Golden Outlaws in Shadow,4,14)
Track(19,Sacred Ravens of Doom,4,2)
Track(20,Golden Horizon from Mars,3,8)
Track(21,Wild Heroes of the Deep,4,4)
Track(22,Raging Giants of the Night,4,7)
Track(23,Eternal Shadows of the North,1,16)
Track(24,Eternal Empire of the Deep,4,7)
Track(25,Phantom

defined class Employee
defined class MediaType
defined class Genre
defined class Track
defined class InvoiceItem
defined class Invoice
employees = Array(Employee(1,Adams,Andrew,General Manager,0), Employee(2,Edwards,Nancy,Sales Manager,1), Employee(3,Peacock,Jane,Sales Support Agent,2), Employee(4,Park,Margaret,Sales Support Agent,2), Employee(5,Johnson,Steve,Sales Support Agent,2), Employee(6,Mitchell,Michael,IT Manager,1), Employee(7,King,Robert,IT Staff,6), Employee(8,Callahan,Laura,IT Staff,6))
mediaTypes = Array(MediaType(1,MPEG audio file), MediaType(2,Protected AAC audio file), MediaType(3,Protected MPEG-4 video file), MediaType(4,Purchased AAC audio file), MediaType(5,AAC audio file))
genres = Array(Genre(...


Array(Genre(...

# Atelier 3

## 1/ Filtrage - Titre en fonction d'un média spécifique

In [2]:
val mediaFilter = List(1, 3, 5)

val filteredTracks = tracksArray.filter(track => mediaFilter.contains(track.mediaTypeId))

println(s"Nombre de tracks filtrées: ${filteredTracks.length}")

Nombre de tracks filtrées: 115


mediaFilter = List(1, 3, 5)
filteredTracks = Array(Track(1,Raging Knights from the East,5,7), Track(2,Toxic Machines in Chains,5,7), Track(3,Midnight Shadows of Death,3,15), Track(4,Eternal Prophets from Mars,3,18), Track(7,Atomic Ghosts in Flames,1,4), Track(9,Wild Wizards of the Abyss,1,18), Track(11,Sonic Giants in Flames,3,9), Track(16,Primal Giants from the East,1,11), Track(17,Screaming Outlaws in Disguise,5,23), Track(20,Golden Horizon from Mars,3,8), Track(23,Eternal Shadows of the North,1,16), Track(25,Phantom Horizon of the North,1,21), Track(26,Golden Wolves of Death,3,3), Track(28,Primal Rebels of Death,3,24), Track(29,Urban Knights of Destruction,3,8), Track(30,Sonic Giants from the Desert,1,23), Track(31,Silent Ravens in Disguise,1,13), Track(34,Et...


Array(Track(1,Raging Knights from the East,5,7), Track(2,Toxic Machines in Chains,5,7), Track(3,Midnight Shadows of Death,3,15), Track(4,Eternal Prophets from Mars,3,18), Track(7,Atomic Ghosts in Flames,1,4), Track(9,Wild Wizards of the Abyss,1,18), Track(11,Sonic Giants in Flames,3,9), Track(16,Primal Giants from the East,1,11), Track(17,Screaming Outlaws in Disguise,5,23), Track(20,Golden Horizon from Mars,3,8), Track(23,Eternal Shadows of the North,1,16), Track(25,Phantom Horizon of the North,1,21), Track(26,Golden Wolves of Death,3,3), Track(28,Primal Rebels of Death,3,24), Track(29,Urban Knights of Destruction,3,8), Track(30,Sonic Giants from the Desert,1,23), Track(31,Silent Ravens in Disguise,1,13), Track(34,Et...

## 1/ Filtrage - Commandes ayant un maximum de titres du genre 1

In [3]:
// Initiation InvoiceItems
val invoiceItems = invoicesArray.flatMap { invoice =>
  val itemCount = Random.nextInt(5) + 1
  val selectedTracks = Random.shuffle(tracksArray.toList).take(itemCount)
  
  selectedTracks.zipWithIndex.map { case (track, index) =>
    val quantity = if (Random.nextDouble() > 0.2) 1 else Random.nextInt(3) + 1
    val unitPrice = BigDecimal(0.99 + Random.nextDouble() * 1.01).setScale(2, BigDecimal.RoundingMode.HALF_UP)
    
    InvoiceItem(
      invoiceLineId = invoice.invoiceId * 10 + index + 1,
      invoiceId = invoice.invoiceId,
      trackId = track.trackId,
      unitPrice = unitPrice,
      quantity = quantity
    )
  }
}

def countGenre1TracksInInvoice(invoiceId: Int): Int = {
  invoiceItems
    .filter(_.invoiceId == invoiceId)
    .map(_.trackId)
    .map(trackId => tracksArray.find(_.trackId == trackId))
    .flatten
    .count(_.genreId == 1)
}

val maxGenre1Count = invoicesArray.map(invoice => countGenre1TracksInInvoice(invoice.invoiceId)).max

val invoicesWithMaxGenre1 = invoicesArray.filter(invoice => 
  countGenre1TracksInInvoice(invoice.invoiceId) == maxGenre1Count
)

println(s"Maximum de titres du genre 1 (Rock) dans une commande: $maxGenre1Count")
println(s"Nombre de commandes ayant ce maximum: ${invoicesWithMaxGenre1.length}")

Maximum de titres du genre 1 (Rock) dans une commande: 2
Nombre de commandes ayant ce maximum: 13


invoiceItems = Array(InvoiceItem(11,1,169,1.97,1), InvoiceItem(12,1,175,1.03,1), InvoiceItem(13,1,128,1.41,1), InvoiceItem(14,1,44,1.47,1), InvoiceItem(15,1,146,1.07,1), InvoiceItem(21,2,49,1.59,2), InvoiceItem(31,3,128,1.71,1), InvoiceItem(41,4,152,1.93,1), InvoiceItem(42,4,132,1.86,1), InvoiceItem(43,4,28,2.00,1), InvoiceItem(44,4,2,1.38,1), InvoiceItem(45,4,173,1.84,1), InvoiceItem(51,5,128,1.62,1), InvoiceItem(52,5,57,1.23,1), InvoiceItem(61,6,16,1.78,1), InvoiceItem(62,6,6,1.11,1), InvoiceItem(71,7,63,1.92,1), InvoiceItem(72,7,73,1.59,2), InvoiceItem(73,7,89,1.23,1), InvoiceItem(74,7,157,1.30,1), InvoiceItem(81,8,97,1.61,1), InvoiceItem(82,8,43,1.43,1), InvoiceItem(91,9,193,1.60,1), InvoiceItem(92,9,5,1.86,1), InvoiceItem(93,9,24,1.74,1), InvoiceItem(94,9,104,1....


Array(InvoiceItem(11,1,169,1.97,1), InvoiceItem(12,1,175,1.03,1), InvoiceItem(13,1,128,1.41,1), InvoiceItem(14,1,44,1.47,1), InvoiceItem(15,1,146,1.07,1), InvoiceItem(21,2,49,1.59,2), InvoiceItem(31,3,128,1.71,1), InvoiceItem(41,4,152,1.93,1), InvoiceItem(42,4,132,1.86,1), InvoiceItem(43,4,28,2.00,1), InvoiceItem(44,4,2,1.38,1), InvoiceItem(45,4,173,1.84,1), InvoiceItem(51,5,128,1.62,1), InvoiceItem(52,5,57,1.23,1), InvoiceItem(61,6,16,1.78,1), InvoiceItem(62,6,6,1.11,1), InvoiceItem(71,7,63,1.92,1), InvoiceItem(72,7,73,1.59,2), InvoiceItem(73,7,89,1.23,1), InvoiceItem(74,7,157,1.30,1), InvoiceItem(81,8,97,1.61,1), InvoiceItem(82,8,43,1.43,1), InvoiceItem(91,9,193,1.60,1), InvoiceItem(92,9,5,1.86,1), InvoiceItem(93,9,24,1.74,1), InvoiceItem(94,9,104,1....

## 2/ Agrégations - Top 5 titres en nombre de ventes

In [4]:
val trackSales = invoiceItems
  .groupBy(_.trackId)
  .map { case (trackId, items) =>
    trackId -> items.map(_.quantity).sum
  }

val top5Tracks = trackSales
  .toSeq
  .sortBy(-_._2)
  .take(5)
  .map { case (trackId, totalSales) =>
    val track = tracksArray.find(_.trackId == trackId).get
    val genreName = genres.find(_.genreId == track.genreId).map(_.name).getOrElse("Unknown")
    (track, totalSales, genreName)
  }

println("Top 5 des titres en nombre de ventes:")
top5Tracks.zipWithIndex.foreach { case ((track, sales, genre), index) =>
  println(s"${index + 1}. ${track.name} - $sales ventes (Genre: $genre)")
}


Top 5 des titres en nombre de ventes:
1. Lost Wizards in Chains - 58 ventes (Genre: Hip Hop/Rap)
2. Mystic Zombies of the Abyss - 55 ventes (Genre: Blues)
3. Stellar Titans from Mars - 55 ventes (Genre: Metal)
4. Crystal Demons in Chains - 53 ventes (Genre: Jazz)
5. Electric Echo of Tomorrow - 52 ventes (Genre: Opera)


trackSales = Map(69 -> 32, 138 -> 41, 101 -> 26, 88 -> 23, 170 -> 50, 115 -> 33, 5 -> 42, 120 -> 39, 10 -> 32, 56 -> 42, 142 -> 29, 153 -> 27, 174 -> 40, 185 -> 50, 42 -> 44, 24 -> 32, 37 -> 48, 25 -> 42, 52 -> 21, 14 -> 29, 184 -> 39, 110 -> 30, 125 -> 47, 196 -> 40, 157 -> 39, 189 -> 43, 20 -> 35, 46 -> 42, 93 -> 31, 152 -> 32, 57 -> 37, 78 -> 23, 29 -> 41, 164 -> 51, 179 -> 28, 106 -> 38, 121 -> 38, 84 -> 52, 147 -> 29, 61 -> 31, 132 -> 35, 89 -> 46, 133 -> 39, 116 -> 27, 1 -> 21, 74 -> 33, 6 -> 41, 60 -> 28, 117 -> 28, 85 -> 35, 102 -> 39, 28 -> 31, 38 -> 46, 160 -> 34, 70 -> 42, 192 -> 38, 21 -> 42, 137 -> 40, 165 -> 44, 33 -> 32, 92 -> 37, 197 -> 41, 65 -> 34, 97 -> 46, 156 -> 47, 9 -> 43, 188 -> 34, 53 -> 30, 169 -> 32, 141 -> 35, 109 -> 4...


Map(69 -> 32, 138 -> 41, 101 -> 26, 88 -> 23, 170 -> 50, 115 -> 33, 5 -> 42, 120 -> 39, 10 -> 32, 56 -> 42, 142 -> 29, 153 -> 27, 174 -> 40, 185 -> 50, 42 -> 44, 24 -> 32, 37 -> 48, 25 -> 42, 52 -> 21, 14 -> 29, 184 -> 39, 110 -> 30, 125 -> 47, 196 -> 40, 157 -> 39, 189 -> 43, 20 -> 35, 46 -> 42, 93 -> 31, 152 -> 32, 57 -> 37, 78 -> 23, 29 -> 41, 164 -> 51, 179 -> 28, 106 -> 38, 121 -> 38, 84 -> 52, 147 -> 29, 61 -> 31, 132 -> 35, 89 -> 46, 133 -> 39, 116 -> 27, 1 -> 21, 74 -> 33, 6 -> 41, 60 -> 28, 117 -> 28, 85 -> 35, 102 -> 39, 28 -> 31, 38 -> 46, 160 -> 34, 70 -> 42, 192 -> 38, 21 -> 42, 137 -> 40, 165 -> 44, 33 -> 32, 92 -> 37, 197 -> 41, 65 -> 34, 97 -> 46, 156 -> 47, 9 -> 43, 188 -> 34, 53 -> 30, 169 -> 32, 141 -> 35, 109 -> 4...

## 2/ Agrégations - Total commande par genre

In [None]:
val totalByGenre = invoiceItems
  .map { item =>
    val track = tracksArray.find(_.trackId == item.trackId).get
    val genreName = genres.find(_.genreId == track.genreId).map(_.name).getOrElse("Unknown")
    (genreName, item.unitPrice * item.quantity, item.quantity)
  }
  .groupBy(_._1)
  .map { case (genre, items) =>
    val total = items.map(_._2).sum
    val commandes = items.map(_._3).sum
    (genre, total, commandes)
  }
  .toSeq
  .sortBy(-_._2)

println("Total des commandes par genre:")
totalByGenre.foreach { case (genre, total, commandes) =>
  println(f"$genre: $commandes commandes pour ${total}%.2f€")
}


Total des commandes par genre:
Sci Fi & Fantasy: 513 commandes pour 769.52€
Jazz: 447 commandes pour 668.26€
Comedy: 445 commandes pour 645.22€
Latin: 375 commandes pour 557.45€
Heavy Metal: 366 commandes pour 543.08€
Pop: 354 commandes pour 537.28€
Metal: 352 commandes pour 535.33€
Alternative & Punk: 336 commandes pour 502.89€
Science Fiction: 333 commandes pour 493.88€
Rock And Roll: 309 commandes pour 463.01€
Bossa Nova: 288 commandes pour 436.40€
Rock: 292 commandes pour 434.86€
Easy Listening: 286 commandes pour 423.00€
Classical: 269 commandes pour 402.31€
R&B/Soul: 260 commandes pour 388.36€
Hip Hop/Rap: 254 commandes pour 386.67€
Opera: 253 commandes pour 382.33€
World: 251 commandes pour 381.66€
Blues: 242 commandes pour 375.27€
Alternative: 216 commandes pour 324.73€
Reggae: 183 commandes pour 273.35€
Drama: 171 commandes pour 262.98€
Electronica/Dance: 151 commandes pour 224.60€
TV Shows: 145 commandes pour 217.53€
Soundtrack: 126 commandes pour 187.62€


totalByGenre = List((Sci Fi & Fantasy,769.52,513), (Jazz,668.26,447), (Comedy,645.22,445), (Latin,557.45,375), (Heavy Metal,543.08,366), (Pop,537.28,354), (Metal,535.33,352), (Alternative & Punk,502.89,336), (Science Fiction,493.88,333), (Rock And Roll,463.01,309), (Bossa Nova,436.40,288), (Rock,434.86,292), (Easy Listening,423.00,286), (Classical,402.31,269), (R&B/Soul,388.36,260), (Hip Hop/Rap,386.67,254), (Opera,382.33,253), (World,381.66,251), (Blues,375.27,242), (Alternative,324.73,216), (Reggae,273.35,183), (Drama,262.98,171), (Electronica/Dance,224.60,151), (TV Shows,217.53,145), (Soundtrack,187.62,126))


List((Sci Fi & Fantasy,769.52,513), (Jazz,668.26,447), (Comedy,645.22,445), (Latin,557.45,375), (Heavy Metal,543.08,366), (Pop,537.28,354), (Metal,535.33,352), (Alternative & Punk,502.89,336), (Science Fiction,493.88,333), (Rock And Roll,463.01,309), (Bossa Nova,436.40,288), (Rock,434.86,292), (Easy Listening,423.00,286), (Classical,402.31,269), (R&B/Soul,388.36,260), (Hip Hop/Rap,386.67,254), (Opera,382.33,253), (World,381.66,251), (Blues,375.27,242), (Alternative,324.73,216), (Reggae,273.35,183), (Drama,262.98,171), (Electronica/Dance,224.60,151), (TV Shows,217.53,145), (Soundtrack,187.62,126))

## 3/ Traitement récursif - Fonction récursive pour trouver le plus haut responsable

In [6]:
def findTopManager(employeeId: Int): Employee = {
  val employee = employees.find(_.employeeId == employeeId)
    .getOrElse(throw new IllegalArgumentException(s"Employee $employeeId not found"))
  
  if (employee.reportsTo == 0) {
    employee
  } else {
    findTopManager(employee.reportsTo)
  }
}

println("Tests de la fonction récursive findTopManager:")
println()

employees.foreach { employee =>
  val topManager = findTopManager(employee.employeeId)
  val hierarchy = buildHierarchyPath(employee.employeeId)
  
  println(s"${employee.firstName} ${employee.lastName} (${employee.title})")
  println(s"  → Plus haut responsable: ${topManager.firstName} ${topManager.lastName} (${topManager.title})")
  println(s"  → Chemin hiérarchique: $hierarchy")
  println()
}

def buildHierarchyPath(employeeId: Int): String = {
  def buildPath(currentId: Int, path: List[String]): List[String] = {
    val employee = employees.find(_.employeeId == currentId).get
    val employeeName = s"${employee.firstName} ${employee.lastName}"

    if (employee.reportsTo == 0) {
      path :+ employeeName
    } else {
      buildPath(employee.reportsTo, path :+ employeeName)
    }
  }

  buildPath(employeeId, List()).mkString(" → ")
}


findTopManager: (employeeId: Int)Employee
buildHierarchyPath: (employeeId: Int)String


Tests de la fonction récursive findTopManager:

Andrew Adams (General Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Andrew Adams

Nancy Edwards (Sales Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Nancy Edwards → Andrew Adams

Jane Peacock (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Jane Peacock → Nancy Edwards → Andrew Adams

Margaret Park (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Margaret Park → Nancy Edwards → Andrew Adams

Steve Johnson (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Steve Johnson → Nancy Edwards → Andrew Adams

Michael Mitchell (IT Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Michael Mitchell → Andrew Adams

Robert King (IT Staff)
  → Plus haut respo

# ATELIER 4 - Transposition sur Spark

## At. 4 - Etape 1/ Manipulation des données générées

In [None]:
// Creation RDD
// tracks ; invoices ; invoiceItems ; mediaFilter => générés pendant l'atelier 3
val tracksRDD = sc.parallelize(tracks)
val invoiceItemsRDD = sc.parallelize(invoiceItems)
val invoicesRDD = sc.parallelize(invoices)

val filteredTracksRDD = tracksRDD.filter(track => mediaFilter.contains(track.mediaTypeId))

// COMMANDES TITRES ROCK
val trackGenreRDD = tracksRDD.map(track => (track.trackId, track.genreId))
val invoiceTrackRDD = invoiceItemsRDD.map(item => (item.trackId, item.invoiceId))
val invoiceGenreCountRDD = invoiceTrackRDD
  .join(trackGenreRDD) 
  .filter(_._2._2 == 1)  // 1 = Genre Rock
  .map { case (trackId, (invoiceId, genreId)) => (invoiceId, 1) }
  .reduceByKey(_ + _) 

val maxGenre1CountRDD = invoiceGenreCountRDD.map(_._2).max()
val invoicesWithMaxGenre1RDD = invoiceGenreCountRDD.filter(_._2 == maxGenre1CountRDD)

println(s"Maximum de titres Rock dans une commande: $maxGenre1CountRDD")
println(s"Nombre de commandes ayant ce maximum: ${invoicesWithMaxGenre1RDD.count()}\n")


// TOP 5 Titres
val trackSalesRDD = invoiceItemsRDD
  .map(item => (item.trackId, item.quantity))
  .reduceByKey(_ + _)

val top5TracksRDD = trackSalesRDD
  .map { case (trackId, totalSales) => (totalSales, trackId) }
  .sortByKey(false)
  .map { case (totalSales, trackId) => (trackId, totalSales) }
  .take(5)
  
top5TracksRDD.zipWithIndex.foreach { case ((trackId, sales), index) =>
  val track = tracksRDD.filter(_.trackId == trackId).first()
  val genreName = genres.find(_.genreId == track.genreId).map(_.name).getOrElse("Unknown")
  println(s"${index + 1}. ${track.name} - $sales ventes (Genre: $genreName)")
}
println("\n")

// COMMANDES PAR GENRE
val trackGenreMapRDD = tracksRDD.map(track => (track.trackId, track.genreId))

val totalByGenreRDD = invoiceItemsRDD
  .map(item => (item.trackId, item.unitPrice * item.quantity))
  .join(trackGenreMapRDD)
  .map { case (trackId, (montant, genreId)) => (genreId, montant) }
  .reduceByKey(_ + _)
  .map { case (genreId, total) =>
    val genreName = genres.find(_.genreId == genreId).map(_.name).getOrElse("Unknown")
    (genreName, total)
  }
  .sortBy(-_._2)

totalByGenreRDD.collect().foreach { case (genre, total) =>
  println(f"$genre: ${total}%.2f€")
}

val totalRevenueRDD = totalByGenreRDD.map(_._2).sum()
println(f"Chiffre d'affaires total: ${totalRevenueRDD}%.2f€")

Maximum de titres Rock dans une commande: 2
Nombre de commandes ayant ce maximum: 13

1. Lost Wizards in Chains - 58 ventes (Genre: Hip Hop/Rap)
2. Mystic Zombies of the Abyss - 55 ventes (Genre: Blues)
3. Stellar Titans from Mars - 55 ventes (Genre: Metal)
4. Crystal Demons in Chains - 53 ventes (Genre: Jazz)
5. Electric Echo of Tomorrow - 52 ventes (Genre: Opera)


Sci Fi & Fantasy: 769.52€
Jazz: 668.26€
Comedy: 645.22€
Latin: 557.45€
Heavy Metal: 543.08€
Pop: 537.28€
Metal: 535.33€
Alternative & Punk: 502.89€
Science Fiction: 493.88€
Rock And Roll: 463.01€
Bossa Nova: 436.40€
Rock: 434.86€
Easy Listening: 423.00€
Classical: 402.31€
R&B/Soul: 388.36€
Hip Hop/Rap: 386.67€
Opera: 382.33€
World: 381.66€
Blues: 375.27€
Alternative: 324.73€
Reggae: 273.35€
Drama: 262.98€
Electronica/Dance: 224.60€
TV Shows: 217.53€
Soundtrack: 187.62€
Chiffre d'affaires total: 10817.59€


tracksRDD = ParallelCollectionRDD[82] at parallelize at <console>:44
invoiceItemsRDD = ParallelCollectionRDD[83] at parallelize at <console>:45
invoicesRDD = ParallelCollectionRDD[84] at parallelize at <console>:46
filteredTracksRDD = MapPartitionsRDD[85] at filter at <console>:48
trackGenreRDD = MapPartitionsRDD[86] at map at <console>:51
invoiceTrackRDD = MapPartitionsRDD[87] at map at <console>:52
invoiceGenreCountRDD = ShuffledRDD[93] at reduceByKey at <console>:57
maxGenre1CountRDD = 2


invoicesWithMaxGenre1RDD: org.apache.spark...


2

## Etape 2/ Analyse des transformation 

In [19]:
println("\n1. PIPELINE FILTRAGE MÉDIA:")
println(filteredTracksRDD.toDebugString)

val trackSalesAnalysisRDD = invoiceItemsRDD
  .map(item => (item.trackId, item.quantity))
  .reduceByKey(_ + _)
  .map { case (trackId, totalSales) => (totalSales, trackId) }
  .sortByKey(false)

println("\n2. PIPELINE TOP 5 TRACKS:")
println(trackSalesAnalysisRDD.toDebugString)

val genreAnalysisRDD = invoiceItemsRDD
  .map(item => (item.trackId, item.unitPrice * item.quantity))
  .join(trackGenreMapRDD)
  .map { case (trackId, (montant, genreId)) => (genreId, montant) }
  .reduceByKey(_ + _)

println("\n3. PIPELINE TOTAL PAR GENRE:")
println(genreAnalysisRDD.toDebugString)

println("\n4. PIPELINE COMPTAGE ROCK:")
println(invoiceGenreCountRDD.toDebugString)



1. PIPELINE FILTRAGE MÉDIA:
(2) MapPartitionsRDD[85] at filter at <console>:48 []
 |  ParallelCollectionRDD[82] at parallelize at <console>:44 []

2. PIPELINE TOP 5 TRACKS:
(2) ShuffledRDD[164] at sortByKey at <console>:37 []
 +-(2) MapPartitionsRDD[161] at map at <console>:36 []
    |  ShuffledRDD[160] at reduceByKey at <console>:35 []
    +-(2) MapPartitionsRDD[159] at map at <console>:34 []
       |  ParallelCollectionRDD[83] at parallelize at <console>:45 []

3. PIPELINE TOTAL PAR GENRE:
(2) ShuffledRDD[170] at reduceByKey at <console>:46 []
 +-(2) MapPartitionsRDD[169] at map at <console>:45 []
    |  MapPartitionsRDD[168] at join at <console>:44 []
    |  MapPartitionsRDD[167] at join at <console>:44 []
    |  CoGroupedRDD[166] at join at <console>:44 []
    +-(2) MapPartitionsRDD[165] at map at <console>:43 []
    |  |  ParallelCollectionRDD[83] at parallelize at <console>:45 []
    +-(2) MapPartitionsRDD[108] at map at <console>:85 []
       |  ParallelCollectionRDD[82] at par

trackSalesAnalysisRDD = ShuffledRDD[164] at sortByKey at <console>:37
genreAnalysisRDD = ShuffledRDD[170] at reduceByKey at <console>:46


ShuffledRDD[170] at reduceByKey at <console>:46

On peut voir grace au toDebugString que les pipelines avec du shuffle sont :
- PIPELINE TOP 5 TRACKS
- PIPELINE TOTAL PAR GENRE
- PIPELINE COMPTAGE ROCK

## Etape 3/ Chargement des CSV