# Generate CSV data format fro Spindles Data from original data

The description of the original data could be found here:
https://web.archive.org/web/20180629162000/http://www.tcts.fpms.ac.be/~devuyst/Databases/DatabaseSpindles/

***How to add a Conda enviroment to Jupyter Notebook***

Interminal type:

```shell
activate your_env_name
python -m ipykernel install --user --name your_env_name --display-name "name_to_show_in_jupyter"
```

In [1]:
import numpy as np
import pandas as pd

## Orignail data file name and setting

Change these constants for different data respectively

In [2]:
# path to the Excerpt file
EXCERPT = 'DatabaseSpindles/excerpt1.txt'


# path to the Hypnogram file
HYPNO = 'DatabaseSpindles/Hypnogram_excerpt1.txt'


### sampling frequency (described in the data description) ###

#excerpt 1
SAMPLE_FREQ = 100

#excerpt 2
#SAMPLE_FREQ = 200

#excerpt 3
#SAMPLE_FREQ = 50

#excerpt 4
#SAMPLE_FREQ = 200

#excerpt 5
#SAMPLE_FREQ = 200

#excerpt 6
#SAMPLE_FREQ = 200

#excerpt 7
#SAMPLE_FREQ = 200

#excerpt 8
#SAMPLE_FREQ = 200



# name of the file to save the generated data
SAVE_DATA_FILE_NAME = 'data_excerpt1.csv'

## Function to load and generate data

In [3]:
def generate_data(excerpt, hypnogram, sample_freq):
    # 5 is a constant specified in the data description
    # as each value in Hypnogram file correspond to 5 seconds
    SAMPLE_SIZE = sample_freq * 5
    rows = {}
    for i in range(len(hypnogram)):
        rows[i] = excerpt.iloc[i*SAMPLE_SIZE:(i+1)*SAMPLE_SIZE].values.transpose()[0]
    data = pd.DataFrame.from_dict(rows, orient="index")
    hypnogram.columns = ['Label']
    data = data.join(hypnogram)
    return data

## Generate and save CSV data

In [4]:
# read the Excerpt file
excerpt_data = pd.read_csv(EXCERPT, header=0)
# read the Hypnogram file
hypnogram_data = pd.read_csv(HYPNO, header=0)

# generated csv data
generated_data = generate_data(excerpt_data, hypnogram_data, SAMPLE_FREQ)

# save the data
generated_data.to_csv(SAVE_DATA_FILE_NAME, header=True)

## Take a look at the generated data

### First 10 rows of the generated data

In [5]:
generated_data.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,Label
0,-0.9562,-0.8962,-3.1877,-4.1783,-5.319,-6.1896,-7.8006,-6.9701,-0.7061,2.3058,...,6.1483,6.0882,1.1451,-2.5673,0.7148,6.9588,4.8775,-2.9575,-7.4704,5
1,-8.8313,-8.5511,-6.5198,-4.1983,2.2958,6.1183,5.2477,3.8268,2.6561,2.7161,...,3.6667,-1.2364,-6.6799,-4.0282,-1.8968,-4.3584,-3.758,-0.8362,-6.4598,5
2,-12.7638,-7.7906,-5.7993,-9.7518,-5.4891,-3.3878,-5.4791,-3.9381,-4.6186,-5.0589,...,-6.8,-6.3097,-4.8687,-2.7074,-1.0463,-2.3972,0.6148,5.4378,4.3371,5
3,0.5647,1.2752,2.1357,0.8049,3.6467,2.2358,-2.217,1.7355,4.7074,3.8268,...,2.9562,7.6292,1.3252,-3.0876,3.0463,-1.8468,-1.8168,-2.177,-6.6098,5
4,-3.9882,-5.1989,-10.8325,-11.8732,-8.7412,-7.9107,-4.7687,-4.1183,-7.0001,-5.249,...,2.3258,2.3459,-1.1364,-4.6386,-4.6486,-0.556,-0.7661,-3.8881,-4.3184,5
5,-6.4698,-8.9013,-8.0708,-3.5679,-4.1683,-7.6505,-5.6793,-2.2471,1.2051,-6.81,...,0.7749,2.3058,7.8594,3.7568,-4.2383,-2.6573,4.6873,3.2764,-0.7961,5
6,-2.3271,-6.3097,-6.7699,-4.6586,-2.6573,1.005,3.3165,4.7174,5.8581,5.5579,...,-2.0169,1.3152,0.5847,-2.6373,-5.8994,-4.0982,-4.9288,-10.1921,-11.4629,5
7,-9.7318,-7.2903,-3.3878,1.8656,2.9562,4.3872,12.3822,15.8545,13.8732,14.3035,...,10.5511,11.992,10.8112,8.6999,6.8787,2.4059,1.4653,2.7261,0.7949,5
8,2.2758,7.7793,12.022,9.9707,4.5072,0.4747,-3.3378,-6.5798,-8.481,-7.9307,...,-0.3359,4.0569,2.9562,-3.3578,-1.9369,-2.9275,-4.6486,-2.7674,-1.1764,5
9,-7.0801,-4.2884,1.2852,-2.187,-0.0257,2.596,2.9462,-1.2064,-3.0176,-0.0457,...,11.922,9.6505,6.9088,13.2828,13.0827,12.1021,11.942,9.2402,10.7712,5


### Size:
- 360 rows (samples)
- 501 columns (500 features + 1 class_label)

In [6]:
generated_data.shape

(360, 501)

### Last column contain class labels:

In [7]:
print("There are %d classes." %(len(generated_data.iloc[:,-1].unique())))
print("")
print("Label  Num_sample")
generated_data.iloc[:,-1].value_counts(sort=False)

There are 5 classes.

Label  Num_sample


0      4
1     68
2    220
3     12
5     56
Name: Label, dtype: int64