In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
idle_data = pd.read_csv('C:\\Orange\\Anomaly Detection\\Nano sense ble\\train1.csv')
nominal_data = pd.read_csv('C:\\Orange\\Anomaly Detection\\Nano sense ble\\train2.csv')

# Display the first few rows of the datasets to understand their structure
print("Idle Data:")
print(idle_data.head())
print("\nNominal Data:")
print(nominal_data.head())

# Plotting the accelerometer data
plt.figure(figsize=(14, 6))

# Plot idle data
plt.subplot(2, 1, 1)
plt.plot(idle_data['timestamp'], idle_data['accX'], label='X-axis')
plt.plot(idle_data['timestamp'], idle_data['accY'], label='Y-axis')
plt.plot(idle_data['timestamp'], idle_data['accZ'], label='Z-axis')
plt.title('Idle Data Accelerometer Readings')
plt.xlabel('Timestamp')
plt.ylabel('Acceleration')
plt.legend()
                                
# Plot nominal data
plt.subplot(2, 1, 2)
plt.plot(nominal_data['timestamp'], nominal_data['accX'], label='X-axis')
plt.plot(nominal_data['timestamp'], nominal_data['accY'], label='Y-axis')
plt.plot(nominal_data['timestamp'], nominal_data['accZ'], label='Z-axis')
plt.title('Nominal Data Accelerometer Readings')
plt.xlabel('Timestamp')
plt.ylabel('Acceleration')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
from scipy.stats import entropy, skew, kurtosis
import warnings


def segment_and_extract_features(df, sampling_rate, window_size=256, overlap=0.5):
    step = int(window_size * (1 - overlap))
    windows = []
    
    for i in range(0, len(df) - window_size + 1, step):
        window = df.iloc[i:i+window_size]
        features = extract_features_from_window(window, sampling_rate)
        features['label'] = window.iloc[0]['label']
        windows.append(features)
    
    return pd.DataFrame(windows)

def extract_features_from_window(window, sampling_rate, max_freq=50, band_width=5):
    features = {}

    # Time-domain features (your existing time-domain feature extraction code remains unchanged)
    for axis in ['accX', 'accY', 'accZ']:
        signal = window[axis].values
        features[f'{axis}_mean'] = np.mean(signal)
        features[f'{axis}_std'] = np.std(signal)
        features[f'{axis}_max'] = np.max(signal)
        features[f'{axis}_min'] = np.min(signal)
        features[f'{axis}_range'] = np.max(signal) - np.min(signal)
        # Robust skewness and kurtosis calculation
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore')
            try:
                features[f'{axis}_skewness'] = skew(signal)
            except:
                features[f'{axis}_skewness'] = 0
            try:
                features[f'{axis}_kurtosis'] = kurtosis(signal)
            except:
                features[f'{axis}_kurtosis'] = 0
        # Zero-crossing rate
        zero_crossings = np.where(np.diff(np.signbit(signal)))[0]
        features[f'{axis}_zero_crossing_rate'] = len(zero_crossings) / len(signal)

    # Frequency domain features
    for axis in ['accX', 'accY', 'accZ']:
        signal = window[axis].values
        fft_vals = fft(signal)
        fft_freq = fftfreq(len(signal), 1 / sampling_rate)

        # Consider only the positive frequencies
        pos_mask = fft_freq > 0
        fft_vals = fft_vals[pos_mask]
        fft_freq = fft_freq[pos_mask]

        # Magnitude spectrum
        magnitude_spectrum = np.abs(fft_vals)

        # Dynamically generate frequency bands and calculate energy
        for start_freq in range(0, max_freq, band_width):
            end_freq = start_freq + band_width
            band_mask = (fft_freq >= start_freq) & (fft_freq < end_freq)
            band_energy = np.sum(magnitude_spectrum[band_mask]**2)
            features[f'{axis}_energy_{start_freq}_{end_freq}Hz'] = band_energy

    return features


def compute_fft(data, sampling_rate):
    N = len(data)
    T = 1.0 / sampling_rate
    yf = np.fft.fft(data)
    xf = np.fft.fftfreq(N, T)[:N//2]
    return xf, 2.0/N * np.abs(yf[:N//2])


correct_sampling_rate = 100  # Replace with your actual sampling rate


# Calculate sampling rates
idle_sampling_rate = correct_sampling_rate
nominal_sampling_rate = correct_sampling_rate

# Compute FFT for each axis in idle data
idle_fft_x, idle_amplitude_x = compute_fft(idle_data['accX'], idle_sampling_rate)
idle_fft_y, idle_amplitude_y = compute_fft(idle_data['accY'], idle_sampling_rate)
idle_fft_z, idle_amplitude_z = compute_fft(idle_data['accZ'], idle_sampling_rate)

# Compute FFT for each axis in nominal data
nominal_fft_x, nominal_amplitude_x = compute_fft(nominal_data['accX'], nominal_sampling_rate)
nominal_fft_y, nominal_amplitude_y = compute_fft(nominal_data['accY'], nominal_sampling_rate)
nominal_fft_z, nominal_amplitude_z = compute_fft(nominal_data['accZ'], nominal_sampling_rate)

# Plotting the FFT results
plt.figure(figsize=(14, 12))

# Plot FFT for idle data
plt.subplot(3, 2, 1)
plt.plot(idle_fft_x, idle_amplitude_x)
plt.title('Idle Data FFT - X-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

plt.subplot(3, 2, 3)
plt.plot(idle_fft_y, idle_amplitude_y)
plt.title('Idle Data FFT - Y-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

plt.subplot(3, 2, 5)
plt.plot(idle_fft_z, idle_amplitude_z)
plt.title('Idle Data FFT - Z-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

# Plot FFT for nominal data
plt.subplot(3, 2, 2)
plt.plot(nominal_fft_x, nominal_amplitude_x)
plt.title('Nominal Data FFT - X-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

plt.subplot(3, 2, 4)
plt.plot(nominal_fft_y, nominal_amplitude_y)
plt.title('Nominal Data FFT - Y-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

plt.subplot(3, 2, 6)
plt.plot(nominal_fft_z, nominal_amplitude_z)
plt.title('Nominal Data FFT - Z-axis')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Amplitude')

plt.tight_layout()
plt.show()

# Print sampling rates
print(f"\nCalculated sampling rate for idle data: {idle_sampling_rate:.2f} Hz")
print(f"Calculated sampling rate for nominal data: {nominal_sampling_rate:.2f} Hz")



# Add labels to the data
idle_data['label'] = 'idle'
nominal_data['label'] = 'nominal'

# Combine the data
all_data = pd.concat([idle_data, nominal_data], ignore_index=True)

# Extract features for each window
features_df = segment_and_extract_features(all_data, sampling_rate=correct_sampling_rate)

print("Shape of features_df:", features_df.shape)
print("\nFirst few rows of features_df:")
print(features_df.head())
print("\nColumns in features_df:")
print(features_df.columns)

In [None]:
#Split Features and Labels


X = features_df.drop('label', axis=1)
y = features_df['label']
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y_encoded)
importances = model.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'feature': feature_names, 'importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)
top_10_features = feature_importance_df.head(15)
print(top_10_features)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap, BoundaryNorm

# Compute the correlation matrix
corr_matrix = features_df.drop('label', axis=1).corr()

# Define the colors and boundaries for the custom colormap
colors = ['red', 'green', 'blue', 'blue', 'green', 'red']
bounds = [-1.0, -0.5, -0.25, 0.0, 0.25, 0.5, 1.0]
norm = BoundaryNorm(boundaries=bounds, ncolors=len(colors))

# Create a custom colormap
custom_cmap = ListedColormap(colors)

# Plot the correlation matrix
fig, ax = plt.subplots(figsize=(15, 12))
cax = ax.imshow(corr_matrix, interpolation='nearest', cmap=custom_cmap, norm=norm)

# Create a colorbar with custom ticks
cbar = fig.colorbar(cax, ticks=bounds)
cbar.set_ticklabels([f'{b:g}' for b in bounds])

# Set labels for rows and columns with column names
ax.set_xticks(np.arange(len(features_df.columns)-1))
ax.set_yticks(np.arange(len(features_df.columns)-1))
ax.set_xticklabels(features_df.columns.drop('label'), rotation=90)
ax.set_yticklabels(features_df.columns.drop('label'))

# Add title to the heatmap
plt.title('Feature Correlation Matrix')
plt.tight_layout()  # Adjust layout to prevent cut-off labels
plt.show()
