In [1]:
import turicreate as tc
from os.path import basename

In [2]:
tc.__version__

'5.6'

In [3]:
# Load Audio Data
data = tc.load_audio('./ESC-50-master/audio/')
meta_data = tc.SFrame.read_csv('./ESC-50-master/meta/esc50.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,int,int,str,str,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [4]:
# Join the audio data and the meta data.
data['filename'] = data['path'].apply(lambda p: basename(p))
data = data.join(meta_data)

In [5]:
# Drop all records which are not part of the ESC-10.
data = data.filter_by('True', 'esc10')

In [6]:
# Make a train-test split, just use the first fold as our test set.
test_set = data.filter_by(1, 'fold')
train_set = data.filter_by(1, 'fold', exclude=True)

In [7]:
# Create the model.
model = tc.sound_classifier.create(train_set, target='category', feature='audio')

Downloading https://docs-assets.developer.apple.com/turicreate/models/VGGishFeatureEmbedding-v1.mlmodel
Download completed: /var/folders/4h/q_d_80nj6_bgdtsbx5_d851r0000gn/T/model_cache/VGGishFeatureEmbedding-v1.mlmodel
Creating a validation set from 5 percent of training data. This may take a while.
	You can set ``validation_set=None`` to disable validation tracking.

Preprocessing audio data -
Preprocessed 168 of 304 examples
Preprocessed 304 of 304 examples

Preparing validataion set

Training a custom neural network -
+-------------------------+-------------------------+-------------------------+-------------------------+
| Iteration               | Training Accuracy       | Validation Accuracy (%) | Elapsed Time            |
+-------------------------+-------------------------+-------------------------+-------------------------+
| 1                       | 0.374                   | 0.188                   | 61.508                  |
+-------------------------+----------------------

In [8]:
# Generate an SArray of predictions from the test set.
predictions = model.predict(test_set)

In [9]:
# Evaluate the model and print the results
metrics = model.evaluate(test_set)
print(metrics)

{'accuracy': 0.925, 'auc': 0.999479166666665, 'precision': 0.9416161616161615, 'recall': 0.925, 'f1_score': 0.9231877582341979, 'log_loss': 0.44230643533920083, 'confusion_matrix': Columns:
	target_label	str
	predicted_label	str
	count	int

Rows: 14

Data:
+----------------+-----------------+-------+
|  target_label  | predicted_label | count |
+----------------+-----------------+-------+
|    rooster     |     sneezing    |   1   |
|      dog       |       dog       |   6   |
|    sneezing    |     sneezing    |   8   |
|   helicopter   |       rain      |   1   |
|      rain      |       rain      |   8   |
| crackling_fire |  crackling_fire |   8   |
|      dog       |     sneezing    |   2   |
|    rooster     |     rooster     |   7   |
|  crying_baby   |   crying_baby   |   8   |
|   helicopter   |     chainsaw    |   2   |
+----------------+-----------------+-------+
[14 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_column

In [10]:
# Save the model for later use in Turi Create
model.save('EnvSceneClassification.model')

In [11]:
# Export for use in Core ML
model.export_coreml('EnvSceneClassification.mlmodel')