In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_paths = [
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_B.h5"
]

In [None]:
import h5py
import numpy as np

def process_h5_files(file_paths):
    """
    Reads multiple H5 files, assigns labels based on file names, and saves the labeled dataset.

    Parameters:
        file_paths (list): List of H5 file paths.

    Returns:
        final_data (numpy.ndarray): Combined dataset from all files.
        final_labels (numpy.ndarray): Combined labels for the dataset.
    """

    # Define category-to-label mapping based on filenames
    label_mapping = {
        "bikethrottlea": 0,
        "bikethrottleb": 1,
        "jackhammera": 2,
        "jackhammerb": 3,
        "jumpinga": 4,
        "jumpingb": 5,
        "walkinga": 6,
        "walkingb": 7
    }

    data_list = []
    labels_list = []

    for file_path in file_paths:
        # Extract and normalize filename
        file_name = file_path.split("/")[-1].replace(".h5", "").replace("_", "").lower()

        # Check if the filename exists in the mapping
        if file_name not in label_mapping:
            print(f"Warning: File '{file_path}' does not match any known category labels. Skipping.")
            continue

        label = label_mapping[file_name]
        print(f"Processing File: {file_path}, Assigned Label: {label}")

        # Load data from H5 file
        with h5py.File(file_path, "r") as h5f:
            dataset_name = list(h5f.keys())[0]  # Get first dataset name dynamically
            data = np.array(h5f[dataset_name])

        # Create label array
        labels = np.full((data.shape[0],), label)

        # Append data and labels
        data_list.append(data)
        labels_list.append(labels)

    # Ensure valid data was collected
    if not data_list or not labels_list:
        raise ValueError("No valid files were processed. Please check the file names and mapping.")

    # Combine into single numpy arrays
    final_data = np.concatenate(data_list, axis=0)
    final_labels = np.concatenate(labels_list, axis=0)

    # Save dataset
    np.save("/content/drive/MyDrive/final_labeled_dataset.npy", final_data)
    np.save("/content/drive/MyDrive/final_labels.npy", final_labels)

    print("Final dataset and labels saved successfully!")

    return final_data, final_labels


# ** Example Usage with Your 8 Files **
file_paths = [
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_B.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_A.h5",
    "/content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_B.h5"
]

data, labels = process_h5_files(file_paths)


Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_A.h5, Assigned Label: 0
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/BikeThrottle_B.h5, Assigned Label: 1
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_A.h5, Assigned Label: 2
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jackhammer_B.h5, Assigned Label: 3
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_A.h5, Assigned Label: 4
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Jumping_B.h5, Assigned Label: 5
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_A.h5, Assigned Label: 6
Processing File: /content/drive/MyDrive/IITM/IITM_fiber_DATAset/Walking_B.h5, Assigned Label: 7
Final dataset and labels saved successfully!


#Checking

In [None]:
import numpy as np

# Load datasets
X = np.load("/content/drive/MyDrive/Z-Score.npy")
y = np.load("/content/drive/MyDrive/final_labels_fixed.npy")

# Check shapes
print(f"Shape of X (features): {X.shape}")  # Expected: (num_samples, timesteps)
print(f"Shape of y (labels): {y.shape}")  # Expected: (num_samples,)

# Check if the number of samples matches
if X.shape[0] != y.shape[0]:
    print(f"❌ ERROR: X has {X.shape[0]} samples, but y has {y.shape[0]} labels.")
else:
    print("✅ Shapes are correct!")


Shape of X (features): (100, 20000000)
Shape of y (labels): (100,)
✅ Shapes are correct!


In [None]:
# Ensure the number of labels matches the number of samples in X
num_samples = X.shape[0]  # Get number of samples in X
new_y = np.zeros(num_samples, dtype=int)  # Create new label array

# Example: Assign labels based on the filenames or your labeling logic
for i in range(num_samples):
    # Example logic: alternate between labels 0 and 1 for a simple case
    new_y[i] = i % 2  # Just an example, use actual labeling strategy

# Save the fixed labels
np.save("/content/drive/MyDrive/final_labels_fixed.npy", new_y)

print(f"Fixed labels: Now y has {new_y.shape[0]} samples")


Fixed labels: Now y has 100 samples


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

# ** Load the Correct Datasets **
X = np.load("/content/drive/MyDrive/Z-Score.npy")  # Use Z-score normalized dataset
y = np.load("/content/drive/MyDrive/final_labels_fixed.npy")  # Labels

# ** Reshape Data for CNN **
X = X.reshape(X.shape[0], X.shape[1], 1)  # Add channel dimension

# ** Convert labels to categorical format **
num_classes = len(np.unique(y))
y = keras.utils.to_categorical(y, num_classes)

# ** Split Data into Training & Testing **
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ** Define CNN Model **
model = keras.Sequential([
    layers.Conv1D(filters=32, kernel_size=3, activation="relu", input_shape=(X.shape[1], 1)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=64, kernel_size=3, activation="relu"),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=128, kernel_size=3, activation="relu"),
    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.3),
    layers.Dense(num_classes, activation="softmax")  # Output Layer
])

# ** Compile Model **
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# ** Train Model **
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# ** Save the Model **
model.save("/content/drive/MyDrive/cnn_fiber_optic_model.h5")

print("Model training complete and saved successfully!")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#Model

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.model_selection import train_test_split

# Step 1: Load Data from Google Drive
# Assuming final labeled dataset is available in /content/drive/MyDrive
X = np.load("/content/drive/MyDrive/final_labeled_dataset.npy")
y = np.load("/content/drive/MyDrive/final_labels_fixed.npy")

# Step 2: Preprocess the data (assuming you are using z-score normalization)
# Example normalization (mean 0, std 1)
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Step 3: Check data shapes
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Ensure y has the correct number of labels
if X.shape[0] != y.shape[0]:
    raise ValueError("The number of samples in X and y does not match!")

# Step 4: Split data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Data Generator (for batch loading)
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X, y, batch_size=32):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.indices = np.arange(len(self.X))

    def __len__(self):
        # Number of batches per epoch
        return int(np.floor(len(self.X) / self.batch_size))

    def __getitem__(self, idx):
        # Generate a batch of data
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_X = self.X[batch_indices]
        batch_y = self.y[batch_indices]
        return batch_X, batch_y

    def on_epoch_end(self):
        # Shuffle indices at the end of each epoch (optional)
        np.random.shuffle(self.indices)

# Step 6: Create data generators for training and testing
train_generator = DataGenerator(X_train, y_train, batch_size=32)
test_generator = DataGenerator(X_test, y_test, batch_size=32)

# Step 7: Define CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(8, activation='softmax')  # Assuming 8 classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Step 8: Train the model using the generator
history = model.fit(train_generator, epochs=20, validation_data=test_generator)

# Step 9: Evaluate the model
loss, accuracy = model.evaluate(test_generator)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Step 10: Save the trained model to a file
model.save("/content/drive/MyDrive/final_trained_model.h5")
print("Model saved successfully to '/content/drive/MyDrive/final_trained_model.h5'")


X shape: (2000000000,)
y shape: (100,)


ValueError: The number of samples in X and y does not match!