In [3]:
import os, sys
from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel
import pandas as pd

In [4]:
# Ensure PySpark uses the same Python interpreter for driver and workers
os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable
os.environ["PYSPARK_PYTHON"] = sys.executable

In [5]:
spark = SparkSession.builder \
    .appName("GenrePrediction") \
    .config("spark.sql.shuffle.partitions", "1") \
    .getOrCreate()
print(f"Running on Spark v{spark.version}")

Running on Spark v3.5.5


In [6]:
model = PipelineModel.load(f"model_stage4_merged_Trans_way_new")
label_indexer = next(
    s for s in model.stages
    if s.__class__.__name__ == "StringIndexerModel"
)
idx2label = label_indexer.labels
print("Genre labels:", idx2label)

Genre labels: ['pop', 'country', 'blues', 'rock', 'jazz', 'reggae', 'hip hop', 'screamo']


In [92]:
import pandas as pd

# 1. Load the merged dataset
merged = pd.read_csv(
    "data/Merged_dataset.csv",
    engine="python",            # fallback parser that handles uneven rows better
    on_bad_lines="skip",        # drop any line that doesn’t parse into 5 fields
    dtype=str                   # read everything as string to avoid type issues
)
# 2. Filter to only the rows where the genre is exactly "hip hop"
#    (use .str.lower() in case there’s capitalization variance)
hiphop_df1 = merged[ merged["genre"].str.lower() == "screamo" ].copy()


In [93]:
hiphop_df = hiphop_df1[:300]

In [94]:
hiphop_df.shape

(126, 5)

In [95]:
# 3. Prepare sample lyrics DataFrame
sample_sdf = spark.createDataFrame(hiphop_df)
sample_sdf.show(truncate=60)

+-------------------------------+--------------------------------------+------------+-------+------------------------------------------------------------+
|                    artist_name|                            track_name|release_date|  genre|                                                      lyrics|
+-------------------------------+--------------------------------------+------------+-------+------------------------------------------------------------+
|Fear, and Loathing in Las Vegas|                        Return to Zero|        2017|screamo|I’m not one of them, no I ain’t I’m not one of them, no I...|
|                         Saosin|                  3rd Measurement in C|        2003|screamo|Taking back, overdone Free and safely Souvenirs, out of s...|
|                        Siamese|                             Chemistry|        2024|screamo|Maybe this chemistry is playing its tricks inside of me A...|
|                   Fame on Fire|                      Spiral (Justice

In [96]:
# 4. Run model to predict genres
predictions = model.transform(sample_sdf)
results = predictions.select(
    "artist_name", "track_name", "lyrics", "probability", "prediction"
)
for row in results.collect():
    artist = row['artist_name']
    title = row['track_name']
    probs = row['probability'].toArray().tolist()
    pred = idx2label[int(row['prediction'])]
    print(f"{artist} - {title}")
    print(f"  → Predicted genre: {pred}")
    print(f"  Probabilities: {dict(zip(idx2label, [round(p,3) for p in probs]))}\n")

Fear, and Loathing in Las Vegas - Return to Zero
  → Predicted genre: pop
  Probabilities: {'pop': 0.259, 'country': 0.18, 'blues': 0.173, 'rock': 0.13, 'jazz': 0.143, 'reggae': 0.078, 'hip hop': 0.015, 'screamo': 0.021}

Saosin - 3rd Measurement in C
  → Predicted genre: screamo
  Probabilities: {'pop': 0.055, 'country': 0.045, 'blues': 0.04, 'rock': 0.03, 'jazz': 0.03, 'reggae': 0.018, 'hip hop': 0.003, 'screamo': 0.78}

Siamese - Chemistry
  → Predicted genre: screamo
  Probabilities: {'pop': 0.032, 'country': 0.022, 'blues': 0.023, 'rock': 0.024, 'jazz': 0.02, 'reggae': 0.011, 'hip hop': 0.002, 'screamo': 0.866}

Fame on Fire - Spiral (Justice)
  → Predicted genre: screamo
  Probabilities: {'pop': 0.001, 'country': 0.001, 'blues': 0.001, 'rock': 0.002, 'jazz': 0.001, 'reggae': 0.0, 'hip hop': 0.0, 'screamo': 0.994}

The Early November - I Want To Hear You Sad
  → Predicted genre: screamo
  Probabilities: {'pop': 0.033, 'country': 0.03, 'blues': 0.026, 'rock': 0.02, 'jazz': 0.022, '

# Songs for video

In [119]:
import pandas as pd

In [120]:
merged = pd.read_csv(
    "data/Merged_dataset.csv",
    engine="python",            # fallback parser that handles uneven rows better
    on_bad_lines="skip",        # drop any line that doesn’t parse into 5 fields
    dtype=str                   # read everything as string to avoid type issues
)

In [121]:
POP = merged[ merged["track_name"].str.lower() == "mohabbat bhi jhoothi" ].copy()

In [130]:
POP

Unnamed: 0,artist_name,track_name,release_date,genre,lyrics
0,mukesh,mohabbat bhi jhoothi,1950,pop,hold time feel break feel untrue convince spea...


In [123]:
BLUES = merged[ merged["track_name"].str.lower() == "a woman is a sometime thing" ].copy()

In [124]:
BLUES

Unnamed: 0,artist_name,track_name,release_date,genre,lyrics
12732,ella fitzgerald,a woman is a sometime thing,1958,blues,lissen daddy warn fore start atravelling woman...


In [125]:
REGGEA = merged[ merged["track_name"].str.lower() == "only jah jah know" ].copy()

In [126]:
REGGEA

Unnamed: 0,artist_name,track_name,release_date,genre,lyrics
21169,tetrack,only jah jah know,1978,reggae,irie souljah come tell enemie friend learn gro...


In [127]:
ROCK = merged[ merged["track_name"].str.lower() == "here, there and everywhere" ].copy()

In [128]:
ROCK

Unnamed: 0,artist_name,track_name,release_date,genre,lyrics
1998,emmylou harris,"here, there and everywhere",1975,pop,make year change life wave hand run hand hair ...
18861,george benson,"here, there and everywhere",1989,jazz,lead better life need make year change life wa...
23659,the beatles,"here, there and everywhere",1966,rock,lead better life need make year change life wa...


In [116]:
screamo = merged[ merged["track_name"] == "Chemistry" ].copy()

In [117]:
screamo

Unnamed: 0,artist_name,track_name,release_date,genre,lyrics
28275,Siamese,Chemistry,2024,screamo,Maybe this chemistry is playing its tricks ins...


In [None]:

here, there and everywhere	

In [None]:
only jah jah know	

In [None]:
a woman is a sometime thing	

In [None]:
mohabbat bhi jhoothi	