# Building a Machine Learning App

In [18]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

df = pd.read_csv('music.csv')
df.describe()

Unnamed: 0,age,gender
count,18.0,18.0
mean,27.944444,0.5
std,5.12746,0.514496
min,20.0,0.0
25%,25.0,0.0
50%,28.0,0.5
75%,31.0,1.0
max,37.0,1.0


## Splitting the Data Set

In [8]:
# We need to split the data set into input and output
# First drop the genre (that is our output)
input_set = df.drop(columns=['genre'])
output_set = df['genre']

input_set
output_set

0        HipHop
1        HipHop
2        HipHop
3          Jazz
4          Jazz
5          Jazz
6     Classical
7     Classical
8     Classical
9         Dance
10        Dance
11        Dance
12     Acoustic
13     Acoustic
14     Acoustic
15    Classical
16    Classical
17    Classical
Name: genre, dtype: object

# Making Predictions

In [12]:
# new instance of the model
model = DecisionTreeClassifier()

In [13]:
# apply the model
model.fit(input_set, output_set)
# perform a prediction on a 21 year old male and a 22 year old female
# for their predicted preferences
predictions = model.predict([ [21, 1], [22, 0]])

In [14]:
predictions # outputs the collection

array(['HipHop', 'Dance'], dtype=object)

In [16]:
print("A 21-year old male prefers", predictions[0])
print("A 22-year old female prefers", predictions[1])

A 21-year old male prefers HipHop
A 22-year old female prefers Dance


# Testing Accuracy

In [22]:
# splitting the information
input_train, input_test, output_train, output_test = train_test_split(input_set, output_set, test_size = 0.2) # indicates the training size = 80%

# predict again with the training sets
model2 = DecisionTreeClassifier()
model2.fit(input_train, output_train)
predictions2 = model2.predict(input_test)

# test the accuracy
score = accuracy_score(output_test, predictions2) # compare the test results to the results from the trained model
score

1.0