### Import Libraries

In [13]:
import scala.io.Source

[32mimport [39m[36mscala.io.Source
[39m

### Import Files

In [14]:
// Dosya yolunu belirle
val filePath = "C:/Users/ilyas/Documents/Data_Engineering/Projects/Scala_ETL_Projects/Data/Raw_File/songs_2000_2020_50k.csv"


[36mfilePath[39m: [32mString[39m = [32m"C:/Users/ilyas/Documents/Data_Engineering/Projects/Scala_ETL_Projects/Data/Raw_File/songs_2000_2020_50k.csv"[39m

### Data Cleaning - Part 1 // Clean column names 

In [15]:
// Dosyayı oku ve satırları al
val lines = Source.fromFile(filePath).getLines().toList

// Başlık ve veri satırlarını ayır
val header = lines.head.split(",").map(_.trim)
val dataRows = lines.tail

// Satırları sütun adı ve değeri ile eşleştiriyoruz
val data = dataRows.map { row =>
  val columns = row.split(",").map(_.trim)
  header.zip(columns).toMap
}

[36mlines[39m: [32mList[39m[[32mString[39m] = [33mList[39m(
  [32m"Title,Artist,Album,Genre,Release Date,Duration,Popularity"[39m,
  [32m"Include name this.,Patrick Anderson,Care.,R&B,2008-01-09,262,71"[39m,
  [32m"Manage west energy.,Eric Miller,Raise get.,Jazz,2011-08-20,187,37"[39m,
  [32m"Evening court painting.,Richard Curry,Sport.,Electronic,2010-05-30,212,58"[39m,
  [32m"Section turn hour.,James Smith,Full.,Hip-Hop,2014-10-12,272,59"[39m,
  [32m"Five agreement teach.,Amy Rodriguez,Eat.,Blues,2005-06-09,131,34"[39m,
  [32m"Turn child.,Jessica Martin,Cold according.,R&B,2006-09-16,207,58"[39m,
  [32m"Old.,Cheyenne Powell,Oil.,Country,2010-04-23,163,72"[39m,
  [32m"Clear fly over.,Aaron Coleman,Strategy development.,Classical,2010-02-06,183,73"[39m,
  [32m"Agency employee present.,Brandon Henderson,Might live.,Country,2020-02-18,243,69"[39m,
  [32m"Face become we.,Raymond White,Probably camera.,Blues,2011-11-07,177,55"[39m,
  [32m"Couple bank.,Paul St

In [16]:
// Başlıkları yazdır
println(header.mkString(" | "))

// Ayırıcı bir çizgi
println("-" * 50)

// İlk 5 satırı yazdır
data.take(5).foreach { row =>
  println(header.map(h => row.getOrElse(h, "")).mkString(" | "))
}


Title | Artist | Album | Genre | Release Date | Duration | Popularity
--------------------------------------------------
Include name this. | Patrick Anderson | Care. | R&B | 2008-01-09 | 262 | 71
Manage west energy. | Eric Miller | Raise get. | Jazz | 2011-08-20 | 187 | 37
Evening court painting. | Richard Curry | Sport. | Electronic | 2010-05-30 | 212 | 58
Section turn hour. | James Smith | Full. | Hip-Hop | 2014-10-12 | 272 | 59
Five agreement teach. | Amy Rodriguez | Eat. | Blues | 2005-06-09 | 131 | 34


### Data Cleaning - Part 2 // Fill the NaN Values

In [None]:
import java.time.LocalDate
import scala.util.Try

// Veri temizleme ve dönüştürme işlemi
val cleanedData = data.map { row =>
  // Eksik verileri işleme
  val title = row.getOrElse("Title", "Unknown Title")
  val artist = row.getOrElse("Artist", "Unknown Artist")
  val album = row.getOrElse("Album", "Unknown Album")
  val genre = row.getOrElse("Genre", "Unknown Genre")
  
  // Duration ve Popularity sütunlarını sayısal değerlere çevirme
  val duration = Try(row.getOrElse("Duration", "0").toInt).getOrElse(0)
  val popularity = Try(row.getOrElse("Popularity", "0").toInt).getOrElse(0)

  // Tarih sütununu (Release Date) yerel tarih formatına çevirme
  val releaseDate = Try(LocalDate.parse(row.getOrElse("Release Date", "2000-01-01"))).getOrElse(LocalDate.of(2000, 1, 1))

  // Dönüştürülmüş veriyi yeni bir Map olarak döndür
  Map(
    "Title" -> title,
    "Artist" -> artist,
    "Album" -> album,
    "Genre" -> genre,
    "Duration" -> duration.toString,
    "Popularity" -> popularity.toString,
    "Release Date" -> releaseDate.toString
  )
}

// İlk 5 satırı formatlı şekilde gösterelim
println(header.mkString(" | "))
println("-" * 50)
cleanedData.take(5).foreach { row =>
  println(header.map(h => row.getOrElse(h, "")).mkString(" | "))
}
