# Performance to Tiny Performance Format

The purpose of this script is to break down all the metatone touch-data into a long Tiny Touchscreen Performance array. Each performance is split per player, cleaned into `[x,y,moving,time,dt]` columns and then concatenated to produce a very long array. This is stored in `h5` format for later use in training ANNs.

In [1]:
from __future__ import print_function
import os
import time
from datetime import timedelta
import pandas as pd
import numpy as np
import random
import h5py

### Load up data

- Loads metatone logs.
- Checks if output directory exists, otherwise creates it.

In [2]:
log_files = []
log_frames = []

for local_file in os.listdir("../data"):
    if local_file.endswith("-touches.csv"):
        log_files.append("../data/" + local_file)

print("Loading all the frames.")
for log in log_files:
    log_frames.append(pd.DataFrame.from_csv(log,parse_dates=True,header=0))
print("Done Loading", len(log_frames), "logs.")

Loading all the frames.
Done Loading 163 logs.


In [35]:
lengths = []

# Calculate performance lengths.
for l in log_frames:
    lengths.append(timedelta.total_seconds(l.index[-1] - l.index[0]))

print("Total length of corpus:")
print(np.sum(lengths) / 3600, "hours")
print("Median performance length")
print(np.median(lengths)/60, "minutes")

df = pd.DataFrame({"length":lengths})
df.length = df.length/60.
df.describe()


Total length of corpus:
20.7342317189 hours
Median performance length
7.10905663333 minutes


Unnamed: 0,length
count,163.0
mean,7.632233
std,5.060826
min,3.5e-05
25%,4.399071
50%,7.109057
75%,9.419563
max,26.003427


# Process touch logs

- Divide each touch log by performer
- Convert each log to tiny performance format (time as float, x, y, moving)
- Save each log individually (currently commented)
- Concatenate all logs into one
- Save as one big file in float32 format.
- In h5 format the 4.3M touches end up around 52MB.

### Problems

- A small number of logs are in y,x format (rotated), so some x,y values end up less than 0 or more than 1.
- Velocity is dropped in the interest of simplicity.
- Some pauses are greater than 5 seconds and should really start again as zero.
- The last two problems are kicked down the road to the next processing script.


In [3]:
def clean_sound_object(frame):
    """Cleans up sound object frames by removing unneeded 
        columns and changing times to differences."""
    first_time = frame.index[0].to_pydatetime()
    output = frame[['x_pos','y_pos','velocity']]
    output['time'] = output.index
    output.velocity = output.velocity/output.velocity
    output.velocity = output.velocity.fillna(0)
    output.x_pos = output.x_pos/1024.0
    output.y_pos = output.y_pos/768.0
    output.time = (output.time - first_time).apply(timedelta.total_seconds)
    output['dt'] = output.time.diff() # time diff.
    output.time = output.time.fillna(0)
    output.dt = output.dt.fillna(0)
    output = output.rename(columns={'x_pos': 'x', 'y_pos': 'y', 'velocity': 'moving', 'time': 'time', 'dt': 'dt'})
    return output

total_touches = 0
total_performances = 0
total_performers = 0
individual_tiny_perfs = []

print("Dividing all performances by player and converting to tiny performance format.")

for log in log_frames:
    total_performances += 1
    for n in log.device_id.unique():
        total_performers += 1
        l = log[log.device_id == n]
        individual_log_title = l.index[0].to_pydatetime().strftime("%Y-%m-%d-%H-%M-%S")
        individual_log_title += "-" + n
        l = clean_sound_object(l)
        l = l.set_index('time')
        total_touches += l.x.count() # Add to total touches processed
        #l.to_csv(output_directory + individual_log_title + output_fileending)
        individual_tiny_perfs.append(l)
                
print()
print("Processed", total_performances, "performances.")
print("There were", total_performers, "performers in total.")
print("Total touches recorded was:", total_touches)
print("Now saving a big file with all performances concatenated.")
total_perf_df = pd.concat(individual_tiny_perfs)
# total_perf_df.to_csv("metatone_corpus_tiny_perf_format.csv" # not saving to CSV anymore.

# clip to x,y in [0,1]
total_perf_df.set_value(total_perf_df[total_perf_df.x > 1].index, 'x', 1.0)
total_perf_df.set_value(total_perf_df[total_perf_df.x < 0].index, 'x', 0.0)
total_perf_df.set_value(total_perf_df[total_perf_df.y > 1].index, 'y', 1.0)
total_perf_df.set_value(total_perf_df[total_perf_df.y < 0].index, 'y', 0.0)

# could clip dt as well.
total_perf_df.set_value(total_perf_df[total_perf_df.dt > 5.0].index, 'dt', 5.0)

# make into one huge array
total_perf_array = np.array(total_perf_df[['x','y','dt']])

# save huge array in h5 format
data_file_name = "MetatoneTinyPerformanceRecords.h5"
with h5py.File(data_file_name, 'w') as data_file:
    dset = data_file.create_dataset('total_performances', data=total_perf_array, dtype='float32')

# done.
print("Done.")

Dividing all performances by player and converting to tiny performance format.

Processed 163 performances.
There were 548 performers in total.
Total touches recorded was: 4298418
Now saving a big file with all performances concatenated.
Done.
