In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import random

In [10]:
def load_and_preprocess_data(path):
    """
    Loads data from a CSV file, converts column names to lowercase,
    and selects the relevant columns for the model.
    """
    df = pd.read_csv("/content/song.csv")
    df.columns = df.columns.str.lower()

    # Select features & target
    df_processed = df[['artist', 'song', 'emotion', 'genre', 'release date', 'explicit']].copy()
    return df_processed


In [11]:
def prepare_data_for_training(df):
    """
    Encodes categorical variables and splits dataset into train & test sets.
    """
    X = df[['emotion']]
    y = df['genre']

    le_emotion = LabelEncoder()
    le_genre = LabelEncoder()

    X_enc = le_emotion.fit_transform(X['emotion']).reshape(-1, 1)
    y_enc = le_genre.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_enc, y_enc, test_size=0.2, random_state=42, stratify=y_enc
    )
    return X_train, X_test, y_train, y_test, le_emotion, le_genre

In [12]:
def train_model(X_train, y_train):
    """Trains a Decision Tree Classifier."""
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train, y_train)
    return model

In [13]:
def evaluate_model(model, X_test, y_test, le_genre):
    """Evaluates model and prints metrics."""
    print("\n--- Model Evaluation ---")
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Accuracy: {accuracy:.2f}\n")
    print("Classification Report:")
    print(classification_report(y_test, predictions, target_names=le_genre.classes_))

In [14]:
def save_model(model, le_emotion, le_genre, path):
    """Saves trained model & encoders."""
    joblib.dump((model, le_emotion, le_genre), "vibetune.pkl")
    print(f"\nModel saved at {path}")

def load_model(path):
    """Loads trained model & encoders."""
    return joblib.load(path)

In [23]:
def recommend_songs(emotion, df, model, le_emotion, le_genre, n=5, genre=None):
    """
    Recommend songs based on emotion and (optionally) genre
    """
    # Filter dataset by emotion
    filtered_df = df[df['emotion'].str.lower() == emotion.lower()]

    # If genre is provided, filter further
    if genre:
        filtered_df = filtered_df[filtered_df['genre'].str.lower() == genre.lower()]

    # If nothing matches, return empty
    if filtered_df.empty:
        return []

    # Randomly pick n songs
    recommendations = filtered_df.sample(min(n, len(filtered_df)))

    return recommendations.to_dict(orient="records")

In [24]:
if __name__ == "__main__":
    DATA_PATH = r"/content/song.csv"     # <--- Put your dataset path here
    MODEL_PATH = r"vibetube.pkl"

    # Step 1: Load dataset
    full_df = load_and_preprocess_data(DATA_PATH)

    # Step 2: Prepare data
    X_train, X_test, y_train, y_test, le_emotion, le_genre = prepare_data_for_training(full_df)

    # Step 3: Train model
    trained_model = train_model(X_train, y_train)

    # Step 4: Evaluate model
    evaluate_model(trained_model, X_test, y_test, le_genre)

    # Step 5: Save model
    save_model(trained_model, le_emotion, le_genre, MODEL_PATH)

    # Step 6: User Input for Recommendation
    user_emotion = input("Enter your emotion (e.g., joy, sad, anger): ").strip().lower()
    user_genre = input("Enter your preferred genre (e.g., pop, rock, jazz): ").strip().lower()

    recommendations = recommend_songs(user_emotion, full_df, trained_model, le_emotion, le_genre, n=5, genre=user_genre)

    if recommendations:
        print("\nHere are your song recommendations:")
        for rec in recommendations:
            print(f"- '{rec['song']}' by {rec['artist']}")
    else:
        print("\nNo matching songs found.")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



--- Model Evaluation ---
Accuracy: 0.31

Classification Report:
              precision    recall  f1-score   support

 alternative       0.00      0.00      0.00      3809
      comedy       0.00      0.00      0.00       206
  electronic       0.00      0.00      0.00      1029
        folk       0.00      0.00      0.00      2124
     hip-hop       0.00      0.00      0.00      3463
        jazz       0.00      0.00      0.00       861
       metal       0.00      0.00      0.00      2098
         pop       0.30      0.90      0.45      9377
 progressive       0.00      0.00      0.00       605
         rap       0.38      0.37      0.37      6640
        rock       0.00      0.00      0.00      2591
        soul       0.00      0.00      0.00      1748

    accuracy                           0.31     34551
   macro avg       0.06      0.11      0.07     34551
weighted avg       0.15      0.31      0.19     34551


Model saved at vibetube.pkl
Enter your emotion (e.g., joy, sad, ang