In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import tree
import joblib

# Load data
music_data = pd.read_csv('../data/music.csv')
X = music_data.drop(columns=['genre'])
y = music_data['genre']

# Split train/test and train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Evaluate model
predictions = model.predict(X_test)
score = accuracy_score(y_test, predictions)
print(f'Accuracy: {score * 100:.2f}%')

# Save model
joblib.dump(model, '../models/music-recommender.joblib')

# Load model and predict new examples
loaded_model = joblib.load('../models/music-recommender.joblib')
new_data = pd.DataFrame([[21, 1], [22, 0]], columns=['age', 'gender'])
new_predictions = loaded_model.predict(new_data)
print(f'Predictions for new examples: {new_predictions}')

# Export tree for visualization
tree.export_graphviz(
    model,
    out_file='../outputs/music-recommender.dot',
    feature_names=['age', 'gender'],
    class_names=sorted(y.unique()),
    label='all',
    rounded=True,
    filled=True
)

Accuracy: 100.00%
Predictions for new examples: ['HipHop' 'Dance']
