In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import os
import glob
import matplotlib as mpl
import re

# Calibration Problems and Drop trials

In [3]:
rootPath = '../data/'
file_suffix = "_full.csv"

In [4]:
trials_to_drop = pd.read_csv("{}trials_droplist.csv".format(rootPath))
trials_to_drop["TYPE"] = "checking"

In [5]:
trials_to_drop

Unnamed: 0,PART_FILE,TRIALS,TYPE
0,../data/part_1/part_1_full.csv,"[None, None, None]",checking
1,../data/part_2/part_2_full.csv,"[None, None, None]",checking
2,../data/part_3/part_3_full.csv,"[None, None, None]",checking
3,../data/part_709/part_709_full.csv,"[None, array([ 2, 3, 8, 9, 11, 12, 18]), None]",checking
4,../data/part_710/part_710_full.csv,"[array([ 4, 11, 12, 14, 15, 17]), array([ 2, ...",checking
5,../data/part_711/part_711_full.csv,"[array([ 4, 11, 12, 14, 15, 17]), array([ 2, ...",checking
6,../data/part_712/part_712_full.csv,"[array([ 1, 3, 4, 11, 12, 14, 17]), array([ ...",checking
7,../data/part_713/part_713_full.csv,"[None, array([ 2, 3, 5, 7, 9, 10, 14, 16, ...",checking
8,../data/part_714/part_714_full.csv,"[None, None, None]",checking
9,../data/part_715/part_715_full.csv,"[None, array([ 4, 7, 8, 12, 16]), None]",checking


In [6]:
# Trials with problems
trials_to_drop = trials_to_drop.append([
    {"PART_FILE": "../data/part_2/part_2_full.csv", "TRIALS": [8], "TYPE": "other"},
    {"PART_FILE": "../data/part_3/part_3_full.csv", "TRIALS": [15], "TYPE": "other"},
    {"PART_FILE": "../data/part_710/part_710_full.csv", "TRIALS": [15], "TYPE": "other"},
    {"PART_FILE": "../data/part_711/part_711_full.csv", "TRIALS": [16], "TYPE": "other"},
    {"PART_FILE": "../data/part_712/part_712_full.csv", "TRIALS": [10], "TYPE": "other"},
    {"PART_FILE": "../data/part_715/part_715_full.csv", "TRIALS": [8], "TYPE": "other"},
    {"PART_FILE": "../data/part_716/part_716_full.csv", "TRIALS": [12, 13], "TYPE": "other"}
])

In [7]:
# Trials with calibration problems
trials_to_drop = trials_to_drop.append([
    {"PART_FILE": "../data/part_4/part_4_full.csv", "TRIALS": [1], "TYPE": "calibration"},
    {"PART_FILE": "../data/part_710/part_710_full.csv", "TRIALS": [1, 5], "TYPE": "calibration"},
    {"PART_FILE": "../data/part_711/part_711_full.csv", "TRIALS": [1], "TYPE": "calibration"},
    {"PART_FILE": "../data/part_713/part_713_full.csv", "TRIALS": [1], "TYPE": "calibration"},
    {"PART_FILE": "../data/part_714/part_714_full.csv", "TRIALS": [1], "TYPE": "calibration"},
    {"PART_FILE": "../data/part_716/part_716_full.csv", "TRIALS": [1], "TYPE": "calibration"}
])

In [8]:
trials_to_drop.reset_index(drop = True, inplace = True)

# We create initial `_drop.csv` files with `CALIBRATION_PROBLEM` default value
files = glob.glob("{0}part_*/part_*_full.csv".format(rootPath))
for filename in files:
    df = pd.read_csv(filename)
    df["CALIBRATION_PROBLEM"] = 'No'
    new_filename = filename.replace("_full.csv", "_drop.csv")
    df.to_csv(new_filename, index = False)

# Drop trials
for row in trials_to_drop.iterrows():
    filename = row[1]["PART_FILE"]
    
    if(os.path.exists(filename)):
        print("Cleaning {}".format(filename))

        trials = np.hstack(row[1]["TRIALS"])
        df = pd.read_csv(filename.replace("_full.csv", "_drop.csv"))

        # For each trial to delete
        for trial in trials:
            if(trial is not None):
                # If calibration problem, don't delete it, just say there was a calibration problem
                # But only if the trial still exsits
                if(row[1]["TYPE"] == "calibration" and not df.query("TRIAL_INDEX == @trial").empty):
                    df.loc[df.query("TRIAL_INDEX == @trial").index, "CALIBRATION_PROBLEM"] = 'Yes'
                else:
                    df = df.query("TRIAL_INDEX != @trial")

        # Export
        new_filename = filename.replace(file_suffix, "_drop.csv")
        print("-- Exporting to {}".format(new_filename))
        df.to_csv(new_filename, index = False)
        df = None
    else:
        print("Skipping {}. File does not exist.".format(filename))
print("Done")

Cleaning ../data/part_1/part_1_full.csv
-- Exporting to ../data/part_1/part_1_drop.csv
Cleaning ../data/part_2/part_2_full.csv
-- Exporting to ../data/part_2/part_2_drop.csv
Cleaning ../data/part_3/part_3_full.csv
-- Exporting to ../data/part_3/part_3_drop.csv
Cleaning ../data/part_709/part_709_full.csv
-- Exporting to ../data/part_709/part_709_drop.csv
Cleaning ../data/part_710/part_710_full.csv
-- Exporting to ../data/part_710/part_710_drop.csv
Cleaning ../data/part_711/part_711_full.csv
-- Exporting to ../data/part_711/part_711_drop.csv
Cleaning ../data/part_712/part_712_full.csv
-- Exporting to ../data/part_712/part_712_drop.csv
Cleaning ../data/part_713/part_713_full.csv
-- Exporting to ../data/part_713/part_713_drop.csv
Cleaning ../data/part_714/part_714_full.csv
-- Exporting to ../data/part_714/part_714_drop.csv
Cleaning ../data/part_715/part_715_full.csv
-- Exporting to ../data/part_715/part_715_drop.csv
Cleaning ../data/part_716/part_716_full.csv
-- Exporting to ../data/part_7