In [7]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [8]:
features = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/features.txt", sep='\s+', header=None, names=['column_index', 'column_name'])
features_cc = features.groupby('column_name').cumcount()
features_cc = pd.DataFrame(features_cc)
features_cc.columns = ['cumcount']

In [9]:
features_df = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/features.txt", sep='\s+', header=None, names=['column_index', 'column_name'])

# 피쳐이름에 그룹바이와 cumcount를 적용한 데이터프레임을 만든다
features_cc = features_df.groupby('column_name').cumcount() # (561, )의 시리즈 생성됨
features_cc = pd.DataFrame(features_cc) # (561, 1)의 데이터프레임으로 변환
features_cc.columns = ['cumcount'] # 칼럼명 추가
features_cc = features_cc.reset_index() # (561,2)가 된다.
features_df = features_df.reset_index() # (561,3)이 된다.

# 양쪽 데이터프레임 reset_index()의 결과로 생긴 'index'열을 기준으로 outer join(병합)한다.
# 그럼 결과적으로 index, column_index, column_name, cumcount 4개의 열을 가진 데이터프레임이 생성된다.
new_df = pd.merge(features_cc, features_df, on='index', how='outer')

# 병합에 사용되었던 index 칼럼을 드랍한다.
new_df = new_df.drop(['index'], axis=1) # column_index, column_name, cumcount의 (561,3)이 된다.

# column_name과 cumcount를 합쳐서 하나의 column_name으로 만드는 과정이다
# cumcount가 1이상일경우 column_name 뒤에 _1 또는 _2를 붙인다.
new_df['column_name'] = new_df[['column_name', 'cumcount']].apply(lambda x: x[0]+'_'+str(x[1])
                                                                if x[1]>0 else x[0], axis=1)

# cumcount를 column_name을 새로짓는 데 사용하였으므로 이제 드랍한다.
# 이 작업을 마치면 cumcount 칼럼이 없어져서 (561,2) 데이터프레임이 된다.
new_df = new_df.drop(['cumcount'], axis=1) # column_index, column_name 두개의 (561,2) 가 된다.

h = new_df['column_name'].value_counts()

for i in h:
    if (i == 3):
        print(False)

In [10]:
X_train = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/train/X_train.txt", header=None, sep="\s+", names=new_df['column_name'].values)
y_train = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/train/y_train.txt", header=None, sep="\s+", names=['activity'])
X_test = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/test/X_test.txt", header=None, sep="\s+", names=new_df['column_name'].values)
y_test = pd.read_csv("/Users/kwonheejin/Documents/DL/dataset/HAR/UCI HAR Dataset/test/y_test.txt", header=None, sep="\s+", names=['activity'])

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((7352, 561), (2947, 561), (7352, 1), (2947, 1))

In [11]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

scaled_X_train = pd.DataFrame(data=X_train, columns=new_df['column_name'].values)
scaled_X_test = pd.DataFrame(data=X_test, columns=new_df['column_name'].values)
scaled_X_train['label'] = y_train.values
scaled_X_test['label'] = y_test.values

scaled_X_train.shape, scaled_X_test.shape

((7352, 562), (2947, 562))

In [12]:
X_train_static = scaled_X_train[scaled_X_train['label'] > 3]
X_train_dynamic = scaled_X_train[scaled_X_train['label'] <= 3]
X_test_static = scaled_X_test[scaled_X_test['label'] > 3]
X_test_dynamic = scaled_X_test[scaled_X_test['label'] <= 3]

X_train_combined = scaled_X_train
X_test_combined = scaled_X_test

X_train_static.shape, X_train_dynamic.shape, X_test_static.shape, X_test_dynamic.shape, X_train_combined.shape, X_test_combined.shape

((4067, 562), (3285, 562), (1560, 562), (1387, 562), (7352, 562), (2947, 562))

In [13]:
y_train_static = X_train_static['label']
y_train_dynamic = X_train_dynamic['label']
y_train_combined = X_train_combined['label']
y_test_static = X_test_static['label']
y_test_dynamic = X_test_dynamic['label']
y_test_combined = X_test_combined['label']

X_train_static = X_train_static.drop('label', axis=1)
X_train_dynamic = X_train_dynamic.drop('label', axis=1)
X_train_combined = X_train_combined.drop('label', axis=1)
X_test_static = X_test_static.drop('label', axis=1)
X_test_dynamic = X_test_dynamic.drop('label', axis=1)
X_test_combined = X_test_combined.drop('label', axis=1)

In [15]:
X_train_static.shape, X_test_static.shape, X_train_dynamic.shape, X_test_dynamic.shape, X_train_combined.shape, X_test_combined.shape

((4067, 561), (1560, 561), (3285, 561), (1387, 561), (7352, 561), (2947, 561))

In [16]:
from scipy import stats

def create_dataset(X, y, time_steps=1, step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
        labels = y.iloc[i: i + time_steps]
        Xs.append(v)        
        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

TIME_STEPS = 10
STEP = 1

In [17]:
X_train_static, y_train_static = create_dataset(X_train_static, y_train_static, TIME_STEPS, STEP)
X_train_dynamic, y_train_dynamic = create_dataset(X_train_dynamic, y_train_dynamic, TIME_STEPS, STEP)
X_train_combined, y_train_combined = create_dataset(X_train_combined, y_train_combined, TIME_STEPS, STEP)
X_test_static, y_test_static = create_dataset(X_test_static, y_test_static, TIME_STEPS, STEP)
X_test_dynamic, y_test_dynamic = create_dataset(X_test_dynamic, y_test_dynamic, TIME_STEPS, STEP)
X_test_combined, y_test_combined = create_dataset(X_test_combined, y_test_combined, TIME_STEPS, STEP)

X_train_static.shape, y_train_static.shape,

  ys.append(stats.mode(labels)[0][0])


((4057, 10, 561), (4057, 1))

In [18]:
from sklearn.preprocessing import OneHotEncoder

enc_static = OneHotEncoder(handle_unknown='ignore', sparse=False)
enc_static = enc_static.fit(y_train_static)

enc_dynamic = OneHotEncoder(handle_unknown='ignore', sparse=False)
enc_dynamic = enc_dynamic.fit(y_train_dynamic)

enc_combined = OneHotEncoder(handle_unknown='ignore', sparse=False)
enc_combined = enc_combined.fit(y_train_combined)

y_train_static = enc_static.transform(y_train_static)
y_test_static = enc_static.transform(y_test_static)

y_train_dynamic = enc_dynamic.transform(y_train_dynamic)
y_test_dynamic = enc_dynamic.transform(y_test_dynamic)

y_train_combined = enc_combined.transform(y_train_combined)
y_test_combined = enc_combined.transform(y_test_combined)



In [19]:
y_train_dynamic.shape

(3275, 3)

In [23]:
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

def model_CNN(X_train_mod, y_train_mod):
    model = Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=(X_train_mod.shape[1], X_train_mod.shape[2])),
        MaxPooling1D(pool_size=2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(y_train_mod.shape[1], activation='softmax')
    ])

    return model

In [27]:
model_static = model_CNN(X_train_static, y_train_static)
model_static.summary()

model_static.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 8, 32)             53888     
                                                                 
 max_pooling1d_2 (MaxPoolin  (None, 4, 32)             0         
 g1D)                                                            
                                                                 
 flatten_2 (Flatten)         (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 128)               16512     
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_5 (Dense)             (None, 3)                 387       
                                                      

In [29]:
model_dynamic = model_CNN(X_train_dynamic, y_train_dynamic)
model_dynamic.summary()

model_dynamic.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_4 (Conv1D)           (None, 8, 32)             53888     
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 4, 32)             0         
 g1D)                                                            
                                                                 
 flatten_4 (Flatten)         (None, 128)               0         
                                                                 
 dense_8 (Dense)             (None, 128)               16512     
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_9 (Dense)             (None, 3)                 387       
                                                      

In [30]:
model_combined = model_CNN(X_train_combined, y_train_combined)
model_combined.summary()

model_combined.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_5 (Conv1D)           (None, 8, 32)             53888     
                                                                 
 max_pooling1d_5 (MaxPoolin  (None, 4, 32)             0         
 g1D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 128)               0         
                                                                 
 dense_10 (Dense)            (None, 128)               16512     
                                                                 
 dropout_5 (Dropout)         (None, 128)               0         
                                                                 
 dense_11 (Dense)            (None, 6)                 774       
                                                      

In [34]:
history_static = model_static.fit(X_train_static, y_train_static, epochs=100, batch_size=32, validation_split=0.3)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [38]:
history_dynamic = model_dynamic.fit(X_train_dynamic, y_train_dynamic, epochs=100, batch_size=32, validation_split=0.3)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100


Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [41]:
history_combined = model_combined.fit(X_train_combined, y_train_combined, epochs=100, batch_size=32, validation_split=0.3)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
