In [128]:
import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file (e.g. pd.read_csv)

In [87]:
df = pd.read_csv(r"C:\Users\furka\Desktop\ain4311\project\real-time-motion-detector\data\geophone-sensor-data.csv")

In [88]:
df.head()

Unnamed: 0,timestamp,mean,top_3_mean,min,max,std_dev,median,q1,q3,skewness,dominant_freq,energy,activity,name
0,01:51:43,2046.726,2057.666667,2035,2060,3.452186,2047.0,2043.0,2049.0,-0.152574,601.0,1988682000.0,walking,Yusuf
1,01:51:46,2046.619333,2053.0,2040,2053,3.580469,2047.0,2044.0,2050.0,-0.150615,300.0,1988028000.0,walking,Yusuf
2,01:51:50,2046.598,2056.0,2030,2059,3.592826,2047.0,2044.0,2049.0,-0.370125,300.0,1987897000.0,walking,Yusuf
3,01:51:53,2046.56,2054.0,2039,2055,3.41483,2047.0,2044.0,2049.0,-0.321296,300.0,1987662000.0,walking,Yusuf
4,01:51:56,2046.656667,2055.0,2034,2057,3.356802,2048.0,2044.0,2049.0,-0.379134,300.0,1988255000.0,walking,Yusuf


In [89]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1800 entries, 0 to 1799
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   timestamp      1800 non-null   object 
 1   mean           1800 non-null   float64
 2   top_3_mean     1800 non-null   float64
 3   min            1800 non-null   int64  
 4   max            1800 non-null   int64  
 5   std_dev        1800 non-null   float64
 6   median         1800 non-null   float64
 7   q1             1800 non-null   float64
 8   q3             1800 non-null   float64
 9   skewness       1800 non-null   float64
 10  dominant_freq  1800 non-null   float64
 11  energy         1800 non-null   float64
 12  activity       1800 non-null   object 
 13  name           1800 non-null   object 
dtypes: float64(9), int64(2), object(3)
memory usage: 197.0+ KB


In [100]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping

le = LabelEncoder()
scaler = MinMaxScaler()


In [91]:
# Split the data into time steps
def create_sequences(data, labels, time_steps=10):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])
        y.append(labels[i+time_steps])
    return np.array(X), np.array(y)

In [92]:
def preprocess_data(df, target, time_steps):
    
    # Normalization the numerical features
    numeric_columns = df[['mean', 'top_3_mean', 'min', 'max', 'std_dev', 'median',
                      'q1', 'q3', 'skewness', 'dominant_freq', 'energy']]

    # Label encode for categorical columns
    selected_columns = ['activity', 'name']
    df[selected_columns] = df[selected_columns].apply(lambda col: le.fit_transform(col))
    
    features_scaled = scaler.fit_transform(numeric_columns)
    
    # Select target and support feature
    if target == 'activity':
        support_feature = 'name'
    elif target == 'name':
        support_feature = 'activity'
    else:
        raise ValueError('Enter a valid target name (activity or name).')
    
    # Features and labels
    labels = df[target]
    
    # Create sequences
    X, y = create_sequences(features_scaled, labels, time_steps)
    return X, y

In [125]:
def train_lstm_model_and_save(df, target, timesteps):
    
    model_dir = "../models/"
    
    # Preprocess data
    X, y = preprocess_data(df, target, timesteps)
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    from tensorflow.keras.callbacks import EarlyStopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    
    # LSTM model
    lstm = Sequential()
    lstm.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
    lstm.add(Dense(32, activation='relu'))
    lstm.add(Dense(len(np.unique(y)), activation='softmax'))  # Sınıf sayısı kadar nöron ve softmax aktivasyonu

    # Compile model
    lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train model
    history = lstm.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])
        
    test_loss, test_accuracy = lstm.evaluate(X_test, y_test)
    y_pred = np.argmax(lstm.predict(X_test), axis=1)
    
    lstm.summary()
    
    # Mevcut dizindeki model versiyonlarını kontrol et
    existing_versions = [int(i) for i in os.listdir(model_dir) if i.isdigit()]

    # Son versiyonu bul, yeni versiyon numarasını belirle
    model_version = max(existing_versions, default=0) + 1

    # Modeli kaydet
    lstm.save(f"{model_dir}/{model_version}/LSTM_model_for_geophone_analysis.h5")
    
    return {
        'model': lstm,
        'classification_report': classification_report(y_test, y_pred),
        'confusion_matrix': confusion_matrix(y_test, y_pred),
        'test_acc': test_accuracy
    }

In [126]:
def main():
    # Train and evaluate model
    results = train_lstm_model_and_save(df, 'activity', 3)
    
    # Print results
    print("Model Performance:")
    print("-------------------")
    print("LSTM Classification Report:")
    print(results['classification_report'])
    print(results['test_acc'])

In [129]:
if __name__ == "__main__":
    main()

Epoch 1/50


  super().__init__(**kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.3688 - loss: 1.1001 - val_accuracy: 0.5278 - val_loss: 1.0805
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5309 - loss: 1.0687 - val_accuracy: 0.4944 - val_loss: 1.0620
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5574 - loss: 1.0285 - val_accuracy: 0.5250 - val_loss: 1.0152
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5205 - loss: 0.9764 - val_accuracy: 0.5694 - val_loss: 0.9582
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6735 - loss: 0.8667 - val_accuracy: 0.6611 - val_loss: 0.8460
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6595 - loss: 0.7571 - val_accuracy: 0.6250 - val_loss: 0.7647
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step




Model Performance:
-------------------
LSTM Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.81      0.84       105
           1       0.88      0.95      0.92       129
           2       0.90      0.88      0.89       126

    accuracy                           0.89       360
   macro avg       0.89      0.88      0.88       360
weighted avg       0.89      0.89      0.89       360

0.8861111402511597
