# Temporal Features - RNN Test

In [1]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

import sqlalchemy
from sqlalchemy import create_engine, inspect

import math
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from pprint import pprint

import os
import sys
import time
from datetime import datetime
from contextlib import redirect_stdout

%run functions.ipynb

In [2]:
# Time the run
start_time = time.time()

## Import datasets

In [4]:
# Import the data
engine = create_engine("sqlite:///voice.sqlite")

# View all of the classes
inspector = inspect(engine)
table_names = inspector.get_table_names()
table_names

['aval',
 'bval',
 'chroma1',
 'chroma10',
 'chroma11',
 'chroma12',
 'chroma2',
 'chroma3',
 'chroma4',
 'chroma5',
 'chroma6',
 'chroma7',
 'chroma8',
 'chroma9',
 'chromastd',
 'delta',
 'demographic',
 'diagnosis',
 'energy',
 'energyentropy',
 'gval',
 'habits',
 'mfcc1',
 'mfcc10',
 'mfcc11',
 'mfcc12',
 'mfcc13',
 'mfcc2',
 'mfcc3',
 'mfcc4',
 'mfcc5',
 'mfcc6',
 'mfcc7',
 'mfcc8',
 'mfcc9',
 'rval',
 'spectralcentroid',
 'spectralentropy',
 'spectralflux',
 'spectralrolloff',
 'spectralspread',
 'zcr']

In [5]:
# Initialise a dictionary to hold dataframes
dataframes = dict()

# Loop through each table
for table in table_names:
    
    # Dataframe name
    df_name = f'{table}_df'
    
    # Create dataframe
    dataframes[df_name] = pd.read_sql(
        f'SELECT * FROM {table}',
        engine
    )

## Preprocessing

### Define the target variable

In [8]:
# Isolate the diagnosis column
y = dataframes['diagnosis_df']['diagnosis'].copy()

# Encode the target variable, ignore subtype
y = y.apply(encode_binary)
y

0      0
1      0
2      1
3      1
4      1
      ..
199    0
200    1
201    1
202    0
203    0
Name: diagnosis, Length: 204, dtype: int64

### Recombine the feature variables

In [29]:
# Define non-temporal features
non_temporal = [
    'rval_df', 'gval_df', 'bval_df', 'aval_df',
    'demographic_df', 'diagnosis_df', 'habits_df'
]

# Define subset of only temporal features
temporal_tables = list(set(dataframes.keys()).difference(set(non_temporal)))

# Initialise a dictionary
feat_dict = dict()
# Loop through each temporal feature
for table in sorted(temporal_tables)[:3]:
    
    df = dataframes[table]
    
    # Use the 'id' column as the index
    # df.set_index('id', inplace=True)
    # print(df)
    # print(table, df.values)
    
    feat_arrays = df.values
    for feat in feat_arrays:
        print(table, feat)
    
    # feat_dict[table] = df.values

# Convert to a dataframe
# feat_df = pd.DataFrame(feat_dict)
# feat_df.head()

chroma10_df [8.81294063e-36 8.81294063e-36 8.81294063e-36 1.27916146e-02
 5.38368905e-02 5.59548557e-02 5.90065697e-02 7.46642830e-02
 7.18954640e-02 6.31388079e-02 5.83483008e-02 1.65024329e-02
 1.92725463e-03 1.91020499e-03 9.13758944e-04 1.46018513e-03
 2.50424622e-03 3.97787932e-03 5.77548303e-03 8.57680295e-03
 9.18590840e-03 3.84067618e-03 3.93833091e-03 6.95767282e-03
 7.02472614e-03 6.29865354e-03 3.89542829e-03 3.48828735e-03
 5.27865781e-03 4.30426263e-03 2.86912252e-03 3.41795938e-03
 3.93965287e-03 6.25775475e-03 6.98562613e-03 2.62168220e-03
 4.32004625e-03 5.16997387e-03 5.18627449e-03 6.00505727e-03
 7.95304379e-03 5.73782231e-03 3.68788906e-03 5.03694889e-03
 5.11165964e-03 2.76667951e-03 4.19228561e-03 5.75656654e-03
 4.63291256e-03 3.78941860e-03 1.82962260e-03 1.78564427e-03
 4.78492299e-03 4.33430809e-03 3.22489349e-03 2.03988776e-03
 3.23523891e-03 2.70786970e-03 2.08247422e-03 1.30831476e-03
 3.37692176e-03 4.15767717e-03 5.27487521e-03 3.40052116e-03
 3.71454723e