Importing the dataset

In [1]:
#Importing the dataset
import pandas as pd
music_data = pd.read_csv('music.csv')
music_data

Unnamed: 0,age,gender,genre
0,20,1,HipHop
1,23,1,HipHop
2,25,1,HipHop
3,26,1,Jazz
4,29,1,Jazz
5,30,1,Jazz
6,31,1,Classical
7,33,1,Classical
8,37,1,Classical
9,20,0,Dance


Cleaning the dataset and making the input dataset

In [2]:
#Cleaning the data
#by dropping the genre column and making it the output, leaving the other 2 columns as input
X = music_data.drop(columns=['genre'])
X

Unnamed: 0,age,gender
0,20,1
1,23,1
2,25,1
3,26,1
4,29,1
5,30,1
6,31,1
7,33,1
8,37,1
9,20,0


Splitting the dataset into output

In [3]:
#Making our output dataset
y = music_data['genre']
y

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

Building the model using Decision Tree Algorithm; because essentially this ML Model needs to be able to make decisions independently

In [4]:
#Building our model using the decision tree algorithm from the sklearn library
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X,y)
#Making predictions
predictions = model.predict([ [21,1], [22,0] ])
predictions



array(['HipHop', 'Dance'], dtype=object)

Calculating the accuracy of the model

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
model.fit(X_train,y_train)
predictions = model.predict(X_test)

In [6]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test,predictions)
score
#The score here would keep getting better as we increase the depth of our tree

1.0

Persisting Models

In [10]:
#To avoid running our code all the time, we can save our already trained model to a file
import joblib
joblib.dump(model, 'music-recommender.joblib')

['music-recommender.joblib']

In [12]:
#Now we load the already saved trained model to test if its functioning properly
model = joblib.load('music-recommender.joblib')
predictions = model.predict([ [21,1], [22,0] ])
predictions



array(['HipHop', 'Dance'], dtype=object)

Visualizing Decision Trees

In [13]:
#This is to have a look at what decision tree algorithm looks like and how it predicts
from sklearn import tree
tree.export_graphviz(model, out_file='music-recomender.dot', feature_names=['age', 'gender'], class_names=sorted(y.unique()), label='all', rounded=True, filled=True)