# ATELIER 2

In [1]:
import scala.util.Random

case class Employee(
  employeeId: Int,
  lastName: String, 
  firstName: String,
  title: String,
  reportsTo: Int
)

case class MediaType(
  mediaTypeId: Int,
  name: String
)

case class Genre(
  genreId: Int,
  name: String
)

case class Track(
  trackId: Int,
  name: String,
  mediaTypeId: Int,
  genreId: Int
)

case class InvoiceItem(
  invoiceLineId: Int,
  invoiceId: Int,
  trackId: Int,
  unitPrice: BigDecimal,
  quantity: Int,
)

case class Invoice(
  invoiceId: Int,
  items: List[Int]
)


val employees = Array(
  Employee(1, "Adams", "Andrew", "General Manager", 0),
  Employee(2, "Edwards", "Nancy", "Sales Manager", 1), 
  Employee(3, "Peacock", "Jane", "Sales Support Agent", 2),
  Employee(4, "Park", "Margaret", "Sales Support Agent", 2),
  Employee(5, "Johnson", "Steve", "Sales Support Agent", 2),
  Employee(6, "Mitchell", "Michael", "IT Manager", 1),
  Employee(7, "King", "Robert", "IT Staff", 6),
  Employee(8, "Callahan", "Laura", "IT Staff", 6)
)

val mediaTypes = Array(
  MediaType(1, "MPEG audio file"),
  MediaType(2, "Protected AAC audio file"), 
  MediaType(3, "Protected MPEG-4 video file"),
  MediaType(4, "Purchased AAC audio file"),
  MediaType(5, "AAC audio file")
)

val genres = Array(
  Genre(1, "Rock"), 
  Genre(2, "Jazz"),
  Genre(3, "Metal"), 
  Genre(4, "Alternative & Punk"),
  Genre(5, "Rock And Roll"),
  Genre(6, "Blues"),
  Genre(7, "Latin"),
  Genre(8, "Reggae"),
  Genre(9, "Pop"),
  Genre(10, "Soundtrack"),
  Genre(11, "Bossa Nova"),
  Genre(12, "Easy Listening"),
  Genre(13, "Heavy Metal"), 
  Genre(14, "R&B/Soul"),
  Genre(15, "Electronica/Dance"),
  Genre(16, "World"), 
  Genre(17, "Hip Hop/Rap"),
  Genre(18, "Science Fiction"),
  Genre(19, "TV Shows"), 
  Genre(20, "Sci Fi & Fantasy"),
  Genre(21, "Drama"), 
  Genre(22, "Comedy"), 
  Genre(23, "Alternative"),
  Genre(24, "Classical"), 
  Genre(25, "Opera")
)

val adjectives = Array(
  "Cosmic", "Electric", "Velvet", "Mystic", "Atomic", "Phantom", "Silent", 
  "Savage", "Eternal", "Digital", "Lost", "Sacred", "Wild", "Urban", "Golden", 
  "Crystal", "Liquid", "Toxic", "Neon", "Crimson", "Stellar", "Raging", 
  "Arctic", "Sonic", "Primal", "Midnight", "Screaming", "Infinite", "Royal", "Lunar"
)

val nouns = Array(
  "Echo", "Giants", "Wolves", "Horizon", "Empire", "Void", "Thunder", "Dragons", 
  "Saints", "Kings", "Ghosts", "Ravens", "Pirates", "Heroes", "Rebels", "Demons", 
  "Machines", "Angels", "Knights", "Lions", "Shadows", "Zombies", "Wizards", 
  "Killers", "Titans", "Outlaws", "Rebels", "Prophets", "Bandits", "Warriors"
)

val complements = Array(
  "of Doom", "in Chains", "from Mars", "of the North", "of Death", "in Disguise", 
  "of the Night", "from Hell", "of Tomorrow", "in Flames", "of the Deep", 
  "from Beyond", "of Destruction", "in Shadow", "from the Sky", "of the Abyss", 
  "in Exile", "from the East", "of Eternity", "from the Desert", "of the Underground"
)

object Track {
  def generateTrack(trackId: Int): Track = {
    val randomGenre = genres(Random.nextInt(genres.length))
    val randomMediaType = mediaTypes(Random.nextInt(mediaTypes.length))
    
    val adjective = adjectives(Random.nextInt(adjectives.length))
    val noun = nouns(Random.nextInt(nouns.length))
    val complement = complements(Random.nextInt(complements.length))
    
    val trackName = s"$adjective $noun $complement"
    
    Track(trackId, trackName, randomMediaType.mediaTypeId, randomGenre.genreId)
  }
}


object Invoice {
  def generateInvoice(invoiceId: Int, availableTracks: Array[Track]): Invoice = {
    val itemCount = Random.nextInt(5) + 1
    val selectedTracks = Random.shuffle(availableTracks.toList).take(itemCount)
    
    val items = selectedTracks.zipWithIndex.map { case (track, index) =>
      val quantity = if (Random.nextDouble() > 0.2) 1 else Random.nextInt(3) + 1
      val unitPrice = BigDecimal(0.99 + Random.nextDouble() * 1.01).setScale(2, BigDecimal.RoundingMode.HALF_UP)
      
      InvoiceItem(
        invoiceLineId = invoiceId * 10 + index + 1,
        invoiceId = invoiceId,
        trackId = track.trackId,
        unitPrice = unitPrice,
        quantity = quantity,
      )
    }
    
    Invoice(invoiceId, items.map(_.invoiceLineId))
  }
}

val tracks = for(index <- 1 to 200) yield Track.generateTrack(index)
val tracksArray = tracks.toArray
println("Liste des tracks :")
tracksArray.foreach(println)


val invoices = for(index <- 1 to 2000) yield Invoice.generateInvoice(index, tracksArray)
val invoicesArray = invoices.toArray
println("\nListe des invoices :")
invoicesArray.foreach(println)




Liste des tracks :
Track(1,Cosmic Zombies from the Desert,3,25)
Track(2,Sacred Rebels of Eternity,4,1)
Track(3,Raging Titans of Doom,5,25)
Track(4,Urban Giants from Hell,5,9)
Track(5,Phantom Rebels in Flames,4,4)
Track(6,Atomic Bandits of Doom,3,6)
Track(7,Lunar Titans of the Underground,2,9)
Track(8,Toxic Killers in Flames,1,18)
Track(9,Royal Kings from Beyond,3,2)
Track(10,Neon Thunder from the Desert,5,18)
Track(11,Savage Ghosts of Eternity,4,7)
Track(12,Primal Dragons from Hell,2,7)
Track(13,Electric Outlaws of the Deep,3,15)
Track(14,Infinite Killers of Eternity,3,3)
Track(15,Infinite Empire from the Sky,5,4)
Track(16,Crystal Prophets of the North,2,18)
Track(17,Royal Saints in Exile,2,19)
Track(18,Digital Wolves in Disguise,3,6)
Track(19,Crystal Echo in Exile,3,5)
Track(20,Phantom Thunder of Tomorrow,3,17)
Track(21,Midnight Demons from Mars,1,5)
Track(22,Raging Heroes of the Underground,2,11)
Track(23,Stellar Rebels of the North,3,17)
Track(24,Neon Shadows from the Desert,2,18)
T

defined class Employee
defined class MediaType
defined class Genre
defined class Track
defined class InvoiceItem
defined class Invoice
employees = Array(Employee(1,Adams,Andrew,General Manager,0), Employee(2,Edwards,Nancy,Sales Manager,1), Employee(3,Peacock,Jane,Sales Support Agent,2), Employee(4,Park,Margaret,Sales Support Agent,2), Employee(5,Johnson,Steve,Sales Support Agent,2), Employee(6,Mitchell,Michael,IT Manager,1), Employee(7,King,Robert,IT Staff,6), Employee(8,Callahan,Laura,IT Staff,6))
mediaTypes = Array(MediaType(1,MPEG audio file), MediaType(2,Protected AAC audio file), MediaType(3,Protected MPEG-4 video file), MediaType(4,Purchased AAC audio file), MediaType(5,AAC audio file))
genres = Array(Genre(...


Array(Genre(...

# Atelier 3

## 1/ Filtrage - Titre en fonction d'un média spécifique

In [2]:
val mediaFilter = List(1, 3, 5)

val filteredTracks = tracksArray.filter(track => mediaFilter.contains(track.mediaTypeId))

println(s"Nombre de tracks filtrées: ${filteredTracks.length}")

Nombre de tracks filtrées: 129


mediaFilter = List(1, 3, 5)
filteredTracks = Array(Track(1,Cosmic Zombies from the Desert,3,25), Track(3,Raging Titans of Doom,5,25), Track(4,Urban Giants from Hell,5,9), Track(6,Atomic Bandits of Doom,3,6), Track(8,Toxic Killers in Flames,1,18), Track(9,Royal Kings from Beyond,3,2), Track(10,Neon Thunder from the Desert,5,18), Track(13,Electric Outlaws of the Deep,3,15), Track(14,Infinite Killers of Eternity,3,3), Track(15,Infinite Empire from the Sky,5,4), Track(18,Digital Wolves in Disguise,3,6), Track(19,Crystal Echo in Exile,3,5), Track(20,Phantom Thunder of Tomorrow,3,17), Track(21,Midnight Demons from Mars,1,5), Track(23,Stellar Rebels of the North,3,17), Track(27,Digital Warriors of Death,1,21), Track(28,Raging Wolves in Shadow,1,17), Track(29,Crimson Ki...


Array(Track(1,Cosmic Zombies from the Desert,3,25), Track(3,Raging Titans of Doom,5,25), Track(4,Urban Giants from Hell,5,9), Track(6,Atomic Bandits of Doom,3,6), Track(8,Toxic Killers in Flames,1,18), Track(9,Royal Kings from Beyond,3,2), Track(10,Neon Thunder from the Desert,5,18), Track(13,Electric Outlaws of the Deep,3,15), Track(14,Infinite Killers of Eternity,3,3), Track(15,Infinite Empire from the Sky,5,4), Track(18,Digital Wolves in Disguise,3,6), Track(19,Crystal Echo in Exile,3,5), Track(20,Phantom Thunder of Tomorrow,3,17), Track(21,Midnight Demons from Mars,1,5), Track(23,Stellar Rebels of the North,3,17), Track(27,Digital Warriors of Death,1,21), Track(28,Raging Wolves in Shadow,1,17), Track(29,Crimson Ki...

## 1/ Filtrage - Commandes ayant un maximum de titres du genre 1

In [3]:
// Initiation InvoiceItems
val invoiceItems = invoicesArray.flatMap { invoice =>
  val itemCount = Random.nextInt(5) + 1
  val selectedTracks = Random.shuffle(tracksArray.toList).take(itemCount)
  
  selectedTracks.zipWithIndex.map { case (track, index) =>
    val quantity = if (Random.nextDouble() > 0.2) 1 else Random.nextInt(3) + 1
    val unitPrice = BigDecimal(0.99 + Random.nextDouble() * 1.01).setScale(2, BigDecimal.RoundingMode.HALF_UP)
    
    InvoiceItem(
      invoiceLineId = invoice.invoiceId * 10 + index + 1,
      invoiceId = invoice.invoiceId,
      trackId = track.trackId,
      unitPrice = unitPrice,
      quantity = quantity
    )
  }
}

def countGenre1TracksInInvoice(invoiceId: Int): Int = {
  invoiceItems
    .filter(_.invoiceId == invoiceId)
    .map(_.trackId)
    .map(trackId => tracksArray.find(_.trackId == trackId))
    .flatten
    .count(_.genreId == 1)
}

val maxGenre1Count = invoicesArray.map(invoice => countGenre1TracksInInvoice(invoice.invoiceId)).max

val invoicesWithMaxGenre1 = invoicesArray.filter(invoice => 
  countGenre1TracksInInvoice(invoice.invoiceId) == maxGenre1Count
)

println(s"Maximum de titres du genre 1 (Rock) dans une commande: $maxGenre1Count")
println(s"Nombre de commandes ayant ce maximum: ${invoicesWithMaxGenre1.length}")

Maximum de titres du genre 1 (Rock) dans une commande: 2
Nombre de commandes ayant ce maximum: 14


invoiceItems = Array(InvoiceItem(11,1,69,1.16,1), InvoiceItem(12,1,71,1.94,1), InvoiceItem(21,2,190,1.08,1), InvoiceItem(31,3,199,1.20,1), InvoiceItem(32,3,82,1.98,1), InvoiceItem(33,3,86,1.09,1), InvoiceItem(34,3,47,1.97,1), InvoiceItem(41,4,147,1.47,1), InvoiceItem(42,4,143,1.94,1), InvoiceItem(43,4,146,1.62,1), InvoiceItem(44,4,77,1.71,1), InvoiceItem(51,5,159,1.52,1), InvoiceItem(52,5,5,1.71,1), InvoiceItem(53,5,8,1.20,1), InvoiceItem(54,5,162,1.69,1), InvoiceItem(61,6,74,1.34,1), InvoiceItem(62,6,72,1.03,1), InvoiceItem(63,6,122,1.93,1), InvoiceItem(64,6,182,1.05,1), InvoiceItem(65,6,191,1.65,1), InvoiceItem(71,7,33,1.57,1), InvoiceItem(72,7,86,1.08,1), InvoiceItem(81,8,198,1.77,1), InvoiceItem(91,9,83,1.60,1), InvoiceItem(101,10,192,1.59,1), InvoiceItem(102,10,...


Array(InvoiceItem(11,1,69,1.16,1), InvoiceItem(12,1,71,1.94,1), InvoiceItem(21,2,190,1.08,1), InvoiceItem(31,3,199,1.20,1), InvoiceItem(32,3,82,1.98,1), InvoiceItem(33,3,86,1.09,1), InvoiceItem(34,3,47,1.97,1), InvoiceItem(41,4,147,1.47,1), InvoiceItem(42,4,143,1.94,1), InvoiceItem(43,4,146,1.62,1), InvoiceItem(44,4,77,1.71,1), InvoiceItem(51,5,159,1.52,1), InvoiceItem(52,5,5,1.71,1), InvoiceItem(53,5,8,1.20,1), InvoiceItem(54,5,162,1.69,1), InvoiceItem(61,6,74,1.34,1), InvoiceItem(62,6,72,1.03,1), InvoiceItem(63,6,122,1.93,1), InvoiceItem(64,6,182,1.05,1), InvoiceItem(65,6,191,1.65,1), InvoiceItem(71,7,33,1.57,1), InvoiceItem(72,7,86,1.08,1), InvoiceItem(81,8,198,1.77,1), InvoiceItem(91,9,83,1.60,1), InvoiceItem(101,10,192,1.59,1), InvoiceItem(102,10,...

## 2/ Agrégations - Top 5 titres en nombre de ventes

In [4]:
val trackSales = invoiceItems
  .groupBy(_.trackId)
  .map { case (trackId, items) =>
    trackId -> items.map(_.quantity).sum
  }

val top5Tracks = trackSales
  .toSeq
  .sortBy(-_._2)
  .take(5)
  .map { case (trackId, totalSales) =>
    val track = tracksArray.find(_.trackId == trackId).get
    val genreName = genres.find(_.genreId == track.genreId).map(_.name).getOrElse("Unknown")
    (track, totalSales, genreName)
  }

println("Top 5 des titres en nombre de ventes:")
top5Tracks.zipWithIndex.foreach { case ((track, sales, genre), index) =>
  println(s"${index + 1}. ${track.name} - $sales ventes (Genre: $genre)")
}


Top 5 des titres en nombre de ventes:
1. Arctic Rebels in Flames - 57 ventes (Genre: Alternative & Punk)
2. Infinite Shadows of Destruction - 56 ventes (Genre: Sci Fi & Fantasy)
3. Stellar Machines from Beyond - 55 ventes (Genre: Alternative & Punk)
4. Screaming Rebels of the Underground - 52 ventes (Genre: Latin)
5. Digital Warriors of Death - 52 ventes (Genre: Drama)


trackSales = Map(69 -> 36, 138 -> 33, 101 -> 47, 88 -> 46, 170 -> 24, 115 -> 29, 5 -> 42, 120 -> 42, 10 -> 46, 56 -> 34, 142 -> 47, 153 -> 34, 174 -> 40, 185 -> 34, 42 -> 30, 24 -> 38, 37 -> 35, 25 -> 40, 52 -> 52, 14 -> 34, 184 -> 33, 110 -> 32, 125 -> 40, 196 -> 40, 157 -> 30, 189 -> 33, 20 -> 37, 46 -> 46, 93 -> 36, 152 -> 40, 57 -> 31, 78 -> 32, 29 -> 40, 164 -> 32, 179 -> 28, 106 -> 46, 121 -> 39, 84 -> 39, 147 -> 34, 61 -> 36, 132 -> 35, 89 -> 43, 133 -> 29, 116 -> 44, 1 -> 27, 74 -> 34, 6 -> 31, 60 -> 35, 117 -> 35, 85 -> 32, 102 -> 36, 28 -> 36, 38 -> 33, 160 -> 44, 70 -> 40, 192 -> 27, 21 -> 44, 137 -> 36, 165 -> 29, 33 -> 29, 92 -> 49, 197 -> 39, 65 -> 42, 97 -> 22, 156 -> 33, 9 -> 39, 188 -> 28, 53 -> 28, 169 -> 38, 141 -> 36, 109 -> 3...


Map(69 -> 36, 138 -> 33, 101 -> 47, 88 -> 46, 170 -> 24, 115 -> 29, 5 -> 42, 120 -> 42, 10 -> 46, 56 -> 34, 142 -> 47, 153 -> 34, 174 -> 40, 185 -> 34, 42 -> 30, 24 -> 38, 37 -> 35, 25 -> 40, 52 -> 52, 14 -> 34, 184 -> 33, 110 -> 32, 125 -> 40, 196 -> 40, 157 -> 30, 189 -> 33, 20 -> 37, 46 -> 46, 93 -> 36, 152 -> 40, 57 -> 31, 78 -> 32, 29 -> 40, 164 -> 32, 179 -> 28, 106 -> 46, 121 -> 39, 84 -> 39, 147 -> 34, 61 -> 36, 132 -> 35, 89 -> 43, 133 -> 29, 116 -> 44, 1 -> 27, 74 -> 34, 6 -> 31, 60 -> 35, 117 -> 35, 85 -> 32, 102 -> 36, 28 -> 36, 38 -> 33, 160 -> 44, 70 -> 40, 192 -> 27, 21 -> 44, 137 -> 36, 165 -> 29, 33 -> 29, 92 -> 49, 197 -> 39, 65 -> 42, 97 -> 22, 156 -> 33, 9 -> 39, 188 -> 28, 53 -> 28, 169 -> 38, 141 -> 36, 109 -> 3...

## 2/ Agrégations - Total commande par genre

In [5]:
val totalByGenre = invoiceItems
  .map { item =>
    val track = tracksArray.find(_.trackId == item.trackId).get
    val genreName = genres.find(_.genreId == track.genreId).map(_.name).getOrElse("Unknown")
    (genreName, item.unitPrice * item.quantity, item.quantity)
  }
  .groupBy(_._1)
  .map { case (genre, items) =>
    val total = items.map(_._2).sum
    val commandes = items.map(_._3).sum
    (genre, total, commandes)
  }
  .toSeq
  .sortBy(-_._2)

println("Total des commandes par genre:")
totalByGenre.foreach { case (genre, total, commandes) =>
  println(f"$genre: $commandes commandes pour ${total}%.2f€")
}


Total des commandes par genre:
World: 470 commandes pour 701.01€
Pop: 405 commandes pour 615.67€
Science Fiction: 404 commandes pour 604.05€
Blues: 383 commandes pour 574.68€
Opera: 372 commandes pour 556.31€
Alternative & Punk: 367 commandes pour 546.21€
Easy Listening: 352 commandes pour 535.37€
Rock: 338 commandes pour 513.84€
Hip Hop/Rap: 344 commandes pour 511.56€
Jazz: 341 commandes pour 502.65€
Alternative: 301 commandes pour 454.89€
Sci Fi & Fantasy: 301 commandes pour 444.41€
Rock And Roll: 290 commandes pour 442.08€
Soundtrack: 301 commandes pour 440.87€
Drama: 262 commandes pour 387.59€
Latin: 251 commandes pour 385.02€
Heavy Metal: 251 commandes pour 378.57€
Electronica/Dance: 256 commandes pour 375.70€
Comedy: 229 commandes pour 349.51€
Reggae: 228 commandes pour 338.70€
Classical: 220 commandes pour 333.02€
R&B/Soul: 216 commandes pour 324.78€
Metal: 187 commandes pour 284.49€
Bossa Nova: 149 commandes pour 229.44€
TV Shows: 136 commandes pour 196.93€


totalByGenre = List((World,701.01,470), (Pop,615.67,405), (Science Fiction,604.05,404), (Blues,574.68,383), (Opera,556.31,372), (Alternative & Punk,546.21,367), (Easy Listening,535.37,352), (Rock,513.84,338), (Hip Hop/Rap,511.56,344), (Jazz,502.65,341), (Alternative,454.89,301), (Sci Fi & Fantasy,444.41,301), (Rock And Roll,442.08,290), (Soundtrack,440.87,301), (Drama,387.59,262), (Latin,385.02,251), (Heavy Metal,378.57,251), (Electronica/Dance,375.70,256), (Comedy,349.51,229), (Reggae,338.70,228), (Classical,333.02,220), (R&B/Soul,324.78,216), (Metal,284.49,187), (Bossa Nova,229.44,149), (TV Shows,196.93,136))


List((World,701.01,470), (Pop,615.67,405), (Science Fiction,604.05,404), (Blues,574.68,383), (Opera,556.31,372), (Alternative & Punk,546.21,367), (Easy Listening,535.37,352), (Rock,513.84,338), (Hip Hop/Rap,511.56,344), (Jazz,502.65,341), (Alternative,454.89,301), (Sci Fi & Fantasy,444.41,301), (Rock And Roll,442.08,290), (Soundtrack,440.87,301), (Drama,387.59,262), (Latin,385.02,251), (Heavy Metal,378.57,251), (Electronica/Dance,375.70,256), (Comedy,349.51,229), (Reggae,338.70,228), (Classical,333.02,220), (R&B/Soul,324.78,216), (Metal,284.49,187), (Bossa Nova,229.44,149), (TV Shows,196.93,136))

## 3/ Traitement récursif - Fonction récursive pour trouver le plus haut responsable

In [6]:
def findTopManager(employeeId: Int): Employee = {
  val employee = employees.find(_.employeeId == employeeId)
    .getOrElse(throw new IllegalArgumentException(s"Employee $employeeId not found"))
  
  if (employee.reportsTo == 0) {
    employee
  } else {
    findTopManager(employee.reportsTo)
  }
}

println("Tests de la fonction récursive findTopManager:")
println()

employees.foreach { employee =>
  val topManager = findTopManager(employee.employeeId)
  val hierarchy = buildHierarchyPath(employee.employeeId)
  
  println(s"${employee.firstName} ${employee.lastName} (${employee.title})")
  println(s"  → Plus haut responsable: ${topManager.firstName} ${topManager.lastName} (${topManager.title})")
  println(s"  → Chemin hiérarchique: $hierarchy")
  println()
}

def buildHierarchyPath(employeeId: Int): String = {
  def buildPath(currentId: Int, path: List[String]): List[String] = {
    val employee = employees.find(_.employeeId == currentId).get
    val employeeName = s"${employee.firstName} ${employee.lastName}"

    if (employee.reportsTo == 0) {
      path :+ employeeName
    } else {
      buildPath(employee.reportsTo, path :+ employeeName)
    }
  }

  buildPath(employeeId, List()).mkString(" → ")
}


findTopManager: (employeeId: Int)Employee
buildHierarchyPath: (employeeId: Int)String


Tests de la fonction récursive findTopManager:

Andrew Adams (General Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Andrew Adams

Nancy Edwards (Sales Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Nancy Edwards → Andrew Adams

Jane Peacock (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Jane Peacock → Nancy Edwards → Andrew Adams

Margaret Park (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Margaret Park → Nancy Edwards → Andrew Adams

Steve Johnson (Sales Support Agent)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Steve Johnson → Nancy Edwards → Andrew Adams

Michael Mitchell (IT Manager)
  → Plus haut responsable: Andrew Adams (General Manager)
  → Chemin hiérarchique: Michael Mitchell → Andrew Adams

Robert King (IT Staff)
  → Plus haut respo

# ATELIER 4 - Transposition sur Spark

## At. 4 - Etape 1/ Manipulation des données générées

In [12]:
// Creation RDD
// tracks / invoices / invoiceItems / mediaFilter => générés pendant l'atelier 3
val tracksRDD = sc.parallelize(tracks)
val invoiceItemsRDD = sc.parallelize(invoiceItems)
val invoicesRDD = sc.parallelize(invoices)

val filteredTracksRDD = tracksRDD.filter(track => mediaFilter.contains(track.mediaTypeId))

val trackGenreRDD = tracksRDD.map(track => (track.trackId, track.genreId))
val invoiceTrackRDD = invoiceItemsRDD.map(item => (item.trackId, item.invoiceId))
val invoiceGenreCountRDD = invoiceTrackRDD
  .join(trackGenreRDD) 
  .filter(_._2._2 == 1)  // 1 = Genre Rock
  .map { case (trackId, (invoiceId, genreId)) => (invoiceId, 1) }
  .reduceByKey(_ + _) 

val maxGenre1CountRDD = invoiceGenreCountRDD.map(_._2).max()
val invoicesWithMaxGenre1RDD = invoiceGenreCountRDD.filter(_._2 == maxGenre1CountRDD)

// Affichage résultat RDD
println(s"Maximum de titres Rock dans une commande: $maxGenre1CountRDD")
println(s"Nombre de commandes ayant ce maximum: ${invoicesWithMaxGenre1RDD.count()}")

Maximum de titres Rock dans une commande: 2
Nombre de commandes ayant ce maximum: 14


tracksRDD = ParallelCollectionRDD[7] at parallelize at <console>:33
invoiceItemsRDD = ParallelCollectionRDD[8] at parallelize at <console>:34
invoicesRDD = ParallelCollectionRDD[9] at parallelize at <console>:35
filteredTracksRDD = MapPartitionsRDD[10] at filter at <console>:37
trackGenreRDD = MapPartitionsRDD[11] at map at <console>:39
invoiceTrackRDD = MapPartitionsRDD[12] at map at <console>:40
invoiceGenreCountRDD = ShuffledRDD[18] at reduceByKey at <console>:45
maxGenre1CountRDD = 2


invoicesWithMaxGenre1RDD: org.apache.spark.rd...


2

## Etape 2/ Analyse des transformation 

## Etape 3/ Chargement des CSV