In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

print(f"NumPy version: {np.__version__}")
# Load the pickle file
data = pd.read_pickle('two_devices_data.pkl')


# Inspect the keys of the dictionary
print(data.keys())

# Assuming the dictionary has keys: 'Probe', 'Backscat', 'Date', 'Label'
# Convert the dictionary to a DataFrame
df = pd.DataFrame(data)


# Display the first few rows of the dataframe
print(df.head())


# Display the structure and summary of the dataset
df.info()




In [None]:
# Convert 'Label' column to string if necessary
if isinstance(df['Label'].iloc[0], np.ndarray):
    df['Label'] = df['Label'].apply(lambda x: str(x))

# Verify the conversion
print(df['Label'].head())
print(df.dtypes)


In [None]:
import plotly.express as px
import pandas as pd
import numpy as np

df['Label'] = df['Label'].str.strip("[]'")

print(df['Label'].unique())



def plot_signals_with_plotly(data, sample_label, num_signals=2):
    # Filter the data for the given label
    sample_data = data[data['Label'] == sample_label]

    if len(sample_data) < num_signals:
        print(f"Not enough data to plot for label {sample_label}. Available samples: {len(sample_data)}")
        return

    # Process and plot each signal
    for i, row in sample_data.head(num_signals).iterrows():
        # Check if 'Backscat' is a NumPy array and handle it
        if isinstance(row['Backscat'], np.ndarray):
            # Check the dimension of the array
            if row['Backscat'].ndim == 2:
             
                backscat_data = row['Backscat'][0, :]  # Assume each row is a recording
                print(f"Using one recording of length {len(backscat_data)} for plotting.")
            else:
                print(f"Backscat array has unexpected number of dimensions: {row['Backscat'].ndim}")
                continue
        else:
            print(f"Unexpected data type in Backscat: {type(row['Backscat'])}")
            continue

        # Create the Plotly figure
        fig = px.line(
            x=np.arange(len(backscat_data)), 
            y=backscat_data,
            title=f'Signal for {sample_label} - Probe: {row["Probe"]}',
            labels={'x': 'Index', 'y': 'Backscatter Intensity'}
        )
        
        # Display the figure
        fig.show()

# Assuming 'df' is your DataFrame and it's been properly formatted
plot_signals_with_plotly(df, 'C2', 2)

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define a function to extract features from Backscat
def extract_features(backscat_array):
    features = {
        'mean': np.mean(backscat_array, axis=1),
        'std': np.std(backscat_array, axis=1),
        'max': np.max(backscat_array, axis=1),
        'min': np.min(backscat_array, axis=1),
        'median': np.median(backscat_array, axis=1),
        '25_percentile': np.percentile(backscat_array, 25, axis=1),
        '50_percentile': np.percentile(backscat_array, 50, axis=1),
        '75_percentile': np.percentile(backscat_array, 75, axis=1),
    }
    return pd.DataFrame(features)

# Extract features for the entire dataset
df_features_list = []

for index, row in df.iterrows():
    backscat_features = extract_features(row['Backscat'])
    backscat_features['Label'] = row['Label']
    df_features_list.append(backscat_features)

# Combine all the features into a single DataFrame
df_features = pd.concat(df_features_list, ignore_index=True)

# Prepare the data for ML
X = df_features.drop(columns=['Label'])
y = df_features['Label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

# Make predictions
y_pred = rf_clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test, y_pred))



In [None]:
import pandas as pd
import numpy as np
from scipy.signal import welch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from scipy.linalg import sqrtm
import pickle

data=df

# Function to extract frequency domain features using Welch's method
def extract_features(signal):
    freqs, psd = welch(signal)
    return np.array([psd.mean(), np.max(psd), np.std(psd)])

# Apply feature extraction to each 'Backscat' entry
data['Features'] = data['Backscat'].apply(lambda x: extract_features(np.array(x)))

# Prepare data
features = np.vstack(data['Features'].values)
labels = LabelEncoder().fit_transform(data['Label'])

# Split the data by Probe
probe1_data = data[data['Probe'] == 'OZ02']
probe2_data = data[data['Probe'] == 'OZ03']

X_probe1 = np.vstack(probe1_data['Features'].values)
X_probe2 = np.vstack(probe2_data['Features'].values)
y_probe1 = LabelEncoder().fit_transform(probe1_data['Label'])
y_probe2 = LabelEncoder().fit_transform(probe2_data['Label'])

# Normalize features
scaler = StandardScaler()
X_probe1_scaled = scaler.fit_transform(X_probe1)
X_probe2_scaled = scaler.transform(X_probe2)

# Function to align data using CORAL
def coral(source, target):
    source_cov = np.cov(source.T) + 1e-5 * np.eye(source.shape[1])
    target_cov = np.cov(target.T) + 1e-5 * np.eye(target.shape[1])
    source_whiten = sqrtm(source_cov)
    target_color = sqrtm(target_cov)
    source_aligned = source @ np.linalg.inv(source_whiten) @ target_color
    return source_aligned

# Align Probe1 data to Probe2
X_probe1_aligned = coral(X_probe1_scaled, X_probe2_scaled)

# Classification with Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_probe1_aligned, y_probe1)
y_pred = model.predict(X_probe2_scaled)

# Evaluation
print("Accuracy on Probe2 data:", accuracy_score(y_probe2, y_pred))
print("Classification Report:\n", classification_report(y_probe2, y_pred))


In [None]:
import numpy as np
import pandas as pd
import pywt
from scipy.signal import welch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from scipy.linalg import sqrtm
import pickle

data=df

# Function to extract features using Wavelet Transform
def extract_wavelet_features(signal):
    coeffs = pywt.wavedec(signal, wavelet='db4', level=3)
    features = []
    for coeff in coeffs:
        features.extend([np.mean(coeff), np.std(coeff), np.max(coeff)])
    return features

# Apply feature extraction to each 'Backscat' entry
data['Wavelet_Features'] = data['Backscat'].apply(lambda x: extract_wavelet_features(np.array(x)))

# Prepare features and labels
features = np.vstack(data['Wavelet_Features'])
labels = LabelEncoder().fit_transform(data['Label'])

# Split the data by Probe
probe1_data = data[data['Probe'] == 'OZ02']
probe2_data = data[data['Probe'] == 'OZ03']

X_probe1 = np.vstack(probe1_data['Wavelet_Features'])
X_probe2 = np.vstack(probe2_data['Wavelet_Features'])
y_probe1 = LabelEncoder().fit_transform(probe1_data['Label'])
y_probe2 = LabelEncoder().fit_transform(probe2_data['Label'])

# Normalize features
scaler = StandardScaler()
X_probe1_scaled = scaler.fit_transform(X_probe1)
X_probe2_scaled = scaler.transform(X_probe2)

# Function to align data using CORAL
def coral(source, target):
    source_cov = np.cov(source.T) + 1e-5 * np.eye(source.shape[1])
    target_cov = np.cov(target.T) + 1e-5 * np.eye(target.shape[1])
    source_whiten = sqrtm(source_cov)
    target_color = sqrtm(target_cov)
    source_aligned = source @ np.linalg.inv(source_whiten) @ target_color
    return source_aligned

# Align Probe1 data to Probe2
X_probe1_aligned = coral(X_probe1_scaled, X_probe2_scaled)

# Classification with Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_probe1_aligned, y_probe1)
y_pred = model.predict(X_probe2_scaled)

# Evaluation
print("Accuracy on Probe2 data:", accuracy_score(y_probe2, y_pred))
print("Classification Report:\n", classification_report(y_probe2, y_pred, target_names=np.unique(data['Label'])))


In [None]:
import numpy as np
import pandas as pd
import pywt
from scipy.signal import welch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import pickle

data=df

# Feature extraction functions
def extract_time_features(signal):
    return [np.mean(signal), np.std(signal), np.min(signal), np.max(signal)]

def extract_frequency_features(signal):
    freqs, psd = welch(signal)
    return [np.mean(psd), np.max(psd), np.std(psd)]

def extract_wavelet_features(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=3)
    features = []
    for coeff in coeffs:
        features.extend([np.mean(coeff), np.std(coeff), np.max(coeff)])
    return features

# Applying feature extraction
data['Time_Features'] = data['Backscat'].apply(lambda x: extract_time_features(np.array(x)))
data['Frequency_Features'] = data['Backscat'].apply(lambda x: extract_frequency_features(np.array(x)))
data['Wavelet_Features'] = data['Backscat'].apply(lambda x: extract_wavelet_features(np.array(x)))

# Split data by Probe
probe1_data = data[data['Probe'] == 'OZ02']
probe2_data = data[data['Probe'] == 'OZ03']

# Scale features
scaler_time = StandardScaler()
scaler_freq = StandardScaler()
scaler_wave = StandardScaler()

X_time1 = scaler_time.fit_transform(np.vstack(probe1_data['Time_Features']))
X_freq1 = scaler_freq.fit_transform(np.vstack(probe1_data['Frequency_Features']))
X_wave1 = scaler_wave.fit_transform(np.vstack(probe1_data['Wavelet_Features']))

X_time2 = scaler_time.transform(np.vstack(probe2_data['Time_Features']))
X_freq2 = scaler_freq.transform(np.vstack(probe2_data['Frequency_Features']))
X_wave2 = scaler_wave.transform(np.vstack(probe2_data['Wavelet_Features']))

y_probe1 = LabelEncoder().fit_transform(probe1_data['Label'])
y_probe2 = LabelEncoder().fit_transform(probe2_data['Label'])

# Setup classifiers
rf = RandomForestClassifier(n_estimators=100, random_state=42)
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
svc = SVC(probability=True, kernel='rbf')

# Train each classifier
rf.fit(X_time1, y_probe1)
gb.fit(X_freq1, y_probe1)
svc.fit(X_wave1, y_probe1)

# Evaluate each classifier
y_pred_rf = rf.predict(X_time2)
y_pred_gb = gb.predict(X_freq2)
y_pred_svc = svc.predict(X_wave2)

print("Random Forest Accuracy:", accuracy_score(y_probe2, y_pred_rf))
print("Random Forest Classification Report:\n", classification_report(y_probe2, y_pred_rf, zero_division=0))

print("Gradient Boosting Accuracy:", accuracy_score(y_probe2, y_pred_gb))
print("Gradient Boosting Classification Report:\n", classification_report(y_probe2, y_pred_gb, zero_division=0))

print("SVC Accuracy:", accuracy_score(y_probe2, y_pred_svc))
print("SVC Classification Report:\n", classification_report(y_probe2, y_pred_svc, zero_division=0))

# Calculate agreement and disagreement
def calculate_agreement(y_pred1, y_pred2, y_pred3):
    total_samples = len(y_pred1)
    all_agree = np.sum((y_pred1 == y_pred2) & (y_pred2 == y_pred3))
    pairwise_disagree_1_2 = np.sum(y_pred1 != y_pred2)
    pairwise_disagree_1_3 = np.sum(y_pred1 != y_pred3)
    pairwise_disagree_2_3 = np.sum(y_pred2 != y_pred3)
    all_disagree = np.sum((y_pred1 != y_pred2) & (y_pred1 != y_pred3) & (y_pred2 != y_pred3))
    
    agreement_rate = all_agree / total_samples
    disagreement_rate_1_2 = pairwise_disagree_1_2 / total_samples
    disagreement_rate_1_3 = pairwise_disagree_1_3 / total_samples
    disagreement_rate_2_3 = pairwise_disagree_2_3 / total_samples
    all_disagreement_rate = all_disagree / total_samples
    
    return agreement_rate, disagreement_rate_1_2, disagreement_rate_1_3, disagreement_rate_2_3, all_disagreement_rate

# Calculate agreement and disagreement rates
agreement_rate, disagreement_rate_1_2, disagreement_rate_1_3, disagreement_rate_2_3, all_disagreement_rate = calculate_agreement(y_pred_rf, y_pred_gb, y_pred_svc)

print("Agreement Rate:", agreement_rate)
print("Disagreement Rate between RF and GB:", disagreement_rate_1_2)
print("Disagreement Rate between RF and SVC:", disagreement_rate_1_3)
print("Disagreement Rate between GB and SVC:", disagreement_rate_2_3)
print("Disagreement Rate among all three classifiers:", all_disagreement_rate)


In [None]:
import numpy as np
import pandas as pd
import pywt
from scipy.signal import welch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from scipy.linalg import sqrtm


data=df

# Feature extraction functions
def extract_time_features(signal):
    return [np.mean(signal), np.std(signal), np.min(signal), np.max(signal)]

def extract_frequency_features(signal):
    freqs, psd = welch(signal)
    return [np.mean(psd), np.max(psd), np.std(psd)]

def extract_wavelet_features(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=3)
    features = []
    for coeff in coeffs:
        features.extend([np.mean(coeff), np.std(coeff), np.max(coeff)])
    return features

# Applying feature extraction
data['Time_Features'] = data['Backscat'].apply(lambda x: extract_time_features(np.array(x)))
data['Frequency_Features'] = data['Backscat'].apply(lambda x: extract_frequency_features(np.array(x)))
data['Wavelet_Features'] = data['Backscat'].apply(lambda x: extract_wavelet_features(np.array(x)))

# Prepare data
labels = LabelEncoder().fit_transform(data['Label'])

# Split data by Probe
probe1_data = data[data['Probe'] == 'OZ02']
probe2_data = data[data['Probe'] == 'OZ03']

# Separate and scale features
def scale_features(features):
    scaler = StandardScaler()
    return scaler.fit_transform(np.vstack(features)), scaler

X_time1, scaler_time = scale_features(probe1_data['Time_Features'])
X_freq1, scaler_freq = scale_features(probe1_data['Frequency_Features'])
X_wave1, scaler_wave = scale_features(probe1_data['Wavelet_Features'])

X_time2 = scaler_time.transform(np.vstack(probe2_data['Time_Features']))
X_freq2 = scaler_freq.transform(np.vstack(probe2_data['Frequency_Features']))
X_wave2 = scaler_wave.transform(np.vstack(probe2_data['Wavelet_Features']))

# Prepare labels
y_probe1 = LabelEncoder().fit_transform(probe1_data['Label'])
y_probe2 = LabelEncoder().fit_transform(probe2_data['Label'])

# Setup classifiers
rf = RandomForestClassifier(n_estimators=100, random_state=42)
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
svc = SVC(probability=True, kernel='rbf')

# Fit each model on different feature sets
rf.fit(X_time1, y_probe1)
gb.fit(X_freq1, y_probe1)
svc.fit(X_wave1, y_probe1)

# Create a VotingClassifier for ensemble learning
ensemble = VotingClassifier(estimators=[
    ('rf', rf),
    ('gb', gb),
    ('svc', svc)
], voting='soft')

# Evaluate on combined feature set from Probe2
X_combined2 = np.hstack([X_time2, X_freq2, X_wave2])
ensemble.fit(X_combined2, y_probe2)

# Prediction and Evaluation
y_pred = ensemble.predict(X_combined2)
accuracy = accuracy_score(y_probe2, y_pred)
print("Enhanced Accuracy on Probe2 data:", accuracy)
print("Enhanced Classification Report:\n", classification_report(y_probe2, y_pred))
