In [16]:
# simple example for training model and predicting

import pandas as pd
from sklearn.tree import DecisionTreeClassifier

music_data = pd.read_csv('music.csv')
# gender 1 = male and 0 = female
# separate input and output sets

# input_data = music_data[['age', 'gender']]
# output_data = music_data[['genre']]
X = music_data.drop(columns=['genre'])
y = music_data['genre']

# assign mode and then fit input and output sets
model = DecisionTreeClassifier()
model.fit(X, y)

# we predict using the model for 21 yo male and 22 yo female which are not in the data
predictions = model.predict([[21, 1], [22, 0]])
predictions



array(['HipHop', 'Dance'], dtype=object)

In [50]:
# more advanced model with splitting data to get accuracy
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

music_data = pd.read_csv('music.csv')

X = music_data.drop(columns=['genre'])
y = music_data['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions)
score

0.75

In [55]:
# model persistence to save the trained model
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
# from sklearn.externals import joblib
import joblib

music_data = pd.read_csv('music.csv')

X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X.values, y)

joblib.dump(model, 'music-recommender.joblib')

# predictions = model.predict([[21, 1]])

['music-recommender.joblib']

In [57]:
# loading trained model 

model = joblib.load('music-recommender.joblib')
predictions = model.predict([[21, 1]])
predictions

array(['HipHop'], dtype=object)

In [54]:
# visualizing model
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

music_data = pd.read_csv('music.csv')

X = music_data.drop(columns=['genre'])
y = music_data['genre']

model = DecisionTreeClassifier()
model.fit(X, y)

tree.export_graphviz(model, out_file='music-recommender.dot', 
                    feature_names=['age', 'gender'],
                    class_names=sorted(y.unique()),
                    label='all',
                    rounded=True,
                    filled=True)

# saves into a .dot file that can be opened in pycharm after installing dot library