In [3]:
import pandas as pd
import os


path = 'metrics/'

## Hacer un archivo para mantener los datos de información de los datos originales y resampleados. Además de uno que contenga la información general de todos los sujetos

In [31]:
# Guardar la información general de todos los sujetos

# Leer un archivo cualquiera
df_info = pd.read_csv(os.path.join(path, 'dfa.csv'), delimiter=';', index_col=0)

# Conservar las columnas que se repiten siempre
df_info.drop(['No. Samples',
			  'Detrented Fluctuation Analysis',
			  'Sample Rate'], axis=1, inplace=True)

df_info.to_csv('data/info.csv', sep=';')
df_info

Unnamed: 0,Actor,Sex,Emotion,Intensity,Statement,Reps,File,Duration
0,Actor_01,Man,Neutral,Normal,Kids are talking by the door,1,03-01-01-01-01-01-01.wav,3.303311
1,Actor_01,Man,Neutral,Normal,Kids are talking by the door,2,03-01-01-01-01-02-01.wav,3.336689
2,Actor_01,Man,Neutral,Normal,Dogs are sitting by the door,1,03-01-01-01-02-01-01.wav,3.269932
3,Actor_01,Man,Neutral,Normal,Dogs are sitting by the door,2,03-01-01-01-02-02-01.wav,3.169841
4,Actor_01,Man,Calm,Normal,Kids are talking by the door,1,03-01-02-01-01-01-01.wav,3.536871
...,...,...,...,...,...,...,...,...
1435,Actor_24,Woman,Surprised,Normal,Dogs are sitting by the door,2,03-01-08-01-02-02-24.wav,3.403401
1436,Actor_24,Woman,Surprised,Strong,Kids are talking by the door,1,03-01-08-02-01-01-24.wav,3.937279
1437,Actor_24,Woman,Surprised,Strong,Kids are talking by the door,2,03-01-08-02-01-02-24.wav,3.970658
1438,Actor_24,Woman,Surprised,Strong,Dogs are sitting by the door,1,03-01-08-02-02-01-24.wav,3.670340


In [34]:
# Leer un archivo cualquiera de los datos resampleados
df_resample_info = pd.read_csv(os.path.join(path, 'dfa1.csv'), delimiter=';', index_col=0)

# Quitar la columna que incluye la métrica y algunos columnas generales
df_resample_info.drop(['Detrented Fluctuation Analysis', 'Actor',
					   'Sex', 'Emotion', 'Intensity',
					   'Statement', 'Duration', 'Reps'], axis=1, inplace=True)

# Guardar en un archivo csv
df_resample_info.to_csv('data/resample_info.csv', sep=';')
df_resample_info

Unnamed: 0,File,No. Samples,Sample Rate
0,03-01-01-01-01-01-01.wav,72838,9999.9680
1,03-01-01-01-01-02-01.wav,73574,10000.0320
2,03-01-01-01-02-01-01.wav,72102,9999.9020
3,03-01-01-01-02-02-01.wav,69895,9999.8700
4,03-01-02-01-01-01-01.wav,77988,9999.8000
...,...,...,...
1435,03-01-08-01-02-02-24.wav,75045,9999.9960
1436,03-01-08-02-01-01-24.wav,86817,9999.8000
1437,03-01-08-02-01-02-24.wav,87553,9999.8545
1438,03-01-08-02-02-01-24.wav,80931,9999.8910


In [35]:
# Leer un archivo cualquiera de los datos originales
df_original_info = pd.read_csv(os.path.join(path, 'dfa.csv'), delimiter=';', index_col=0)

# Quitar la columna que incluye la métrica
df_original_info.drop(['Detrented Fluctuation Analysis', 'Actor',
					   'Sex', 'Emotion', 'Intensity',
					   'Statement', 'Duration', 'Reps'], axis=1, inplace=True)

# Guardar en un archivo csv
df_original_info.to_csv('data/original_info.csv', sep=';')
df_original_info

Unnamed: 0,File,No. Samples,Sample Rate
0,03-01-01-01-01-01-01.wav,72838,22050.0
1,03-01-01-01-01-02-01.wav,73574,22050.0
2,03-01-01-01-02-01-01.wav,72102,22050.0
3,03-01-01-01-02-02-01.wav,69895,22050.0
4,03-01-02-01-01-01-01.wav,77988,22050.0
...,...,...,...
1435,03-01-08-01-02-02-24.wav,75045,22050.0
1436,03-01-08-02-01-01-24.wav,86817,22050.0
1437,03-01-08-02-01-02-24.wav,87553,22050.0
1438,03-01-08-02-02-01-24.wav,80931,22050.0


# Agrupar en un archivo las métricas con las pistas de audio resampleados y los que no

In [59]:
files = os.listdir(path)
data_columns = {  # nombre archivo: columna(s) con la informacion necesaria
	'dfa': 'Detrented Fluctuation Analysis',
	'hurst': 'Hurst Exponent',
	'sampen': 'Sample Entropy',
	'lyap_e': [
		'1st Lyapunov Exponent', 
		'2nd Lyapunov Exponent',
		'3rd Lyapunov Exponent',
		'4th Lyapunov Exponent'
	],
	'lyap_r': 'Lyapunov Exponent',
	'corr_dim': 'Correlation Dimension'
}

column_label = {  # nombre archivo: nombre de la columna que se utilizará
	'dfa': 'dfa',
	'hurst': 'hurst',
	'sampen': 'sampen',
	'lyap_r': 'lyap_r',
	'corr_dim': 'corr_dim',
	'lyap_e': ['lyap_e1', 'lyap_e2', 'lyap_e3', 'lyap_e4']
}

df_info = pd.read_csv('data/info.csv', delimiter=';', index_col=0)
df_resampled = pd.DataFrame(df_info['File'])
df_original = pd.DataFrame(df_info['File'])

for file in files:
	filename, ext = os.path.splitext(file)
	if ext != '.csv':
		continue

	df_temp = pd.read_csv(os.path.join(path, file), delimiter=';', index_col=0)

	if '1' in filename:
		filename = filename[:-1]  # Quitar el sufijo `1`
		df_resampled[column_label[filename]] = df_temp[data_columns[filename]]
	else:
		df_original[column_label[filename]] = df_temp[data_columns[filename]]

df_original.to_csv('data/original.csv', sep=';')
df_resampled.to_csv('data/resampled.csv', sep=';')