<a href="https://colab.research.google.com/github/aymaw/all/blob/main/train_and_test_acc_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Install necessary packages
!pip install pycaret
from pycaret.classification import *
import pandas as pd
import numpy as np
import plotly.express as px
from google.colab import files
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Upload training data
uploaded = files.upload()
df_train = pd.read_csv(list(uploaded.keys())[0])
print("Data successfully uploaded and read!")

# Display basic statistics
print(df_train.describe())

# Visualize raw acceleration data
fig = px.line(
    df_train,
    x='Time (s)',
    y=['Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)'],
    title="Acceleration Over Time (X, Y, Z Axes)"
)
fig.update_yaxes(title='Acceleration (m/s^2)')
fig.update_xaxes(title='Time (s)')
fig.show()

# Visualize absolute acceleration over time
fig = px.line(
    df_train,
    x='Time (s)',
    y=['Absolute acceleration (m/s^2)'],
    title="Absolute Acceleration Over Time"
)
fig.update_yaxes(title='Absolute Acceleration (m/s^2)')
fig.update_xaxes(title='Time (s)')
fig.show()

# Annotate activities
Activity_1 = "Walking"
Activity_1_Start_Time = 10
Activity_1_Finish_Time = 50

Activity_2 = "Running"
Activity_2_Start_Time = 60
Activity_2_Finish_Time = 100

Activity_3 = "Standing"
Activity_3_Start_Time = 110
Activity_3_Finish_Time = 150

# Add activity labels
df_train['class'] = 'NAN'
df_train['class'] = np.where(
    df_train['Time (s)'].between(Activity_1_Start_Time, Activity_1_Finish_Time),
    Activity_1, df_train['class']
)
df_train['class'] = np.where(
    df_train['Time (s)'].between(Activity_2_Start_Time, Activity_2_Finish_Time),
    Activity_2, df_train['class']
)
df_train['class'] = np.where(
    df_train['Time (s)'].between(Activity_3_Start_Time, Activity_3_Finish_Time),
    Activity_3, df_train['class']
)
df_train = df_train[df_train['class'] != 'NAN']

# Plot annotated data
color_map = {Activity_1: "#F1948A", Activity_2: "#48C9B0", Activity_3: "#85C1E9"}
fig = px.scatter(
    df_train,
    x='Time (s)',
    y='class',
    color='class',
    color_discrete_map=color_map,
    title="Annotated Activities Over Time"
)
fig.show()

# Plot absolute acceleration with activities
fig = px.scatter(
    df_train,
    x='Time (s)',
    y='Absolute acceleration (m/s^2)',
    color='class',
    color_discrete_map=color_map,
    title="Absolute Acceleration by Activity"
)
fig.show()

# Feature extraction
def feature_extractor(df, window_size=100, step_size=100):
    z_list = []
    labels = []
    for i in range(0, len(df) - window_size, step_size):
        window = df['Absolute acceleration (m/s^2)'][i:i+window_size]
        z_list.append(window)
        labels.append(df['class'][i:i+window_size].mode()[0])
    features = pd.DataFrame({
        'mean': pd.Series(z_list).apply(np.mean),
        'std_dev': pd.Series(z_list).apply(np.std),
        'min': pd.Series(z_list).apply(np.min),
        'max': pd.Series(z_list).apply(np.max),
        'range': pd.Series(z_list).apply(lambda x: np.max(x) - np.min(x)),
        'median': pd.Series(z_list).apply(np.median),
        'mad': pd.Series(z_list).apply(lambda x: np.mean(np.abs(x - np.mean(x)))),
        'skewness': pd.Series(z_list).apply(lambda x: stats.skew(x)),
        'kurtosis': pd.Series(z_list).apply(lambda x: stats.kurtosis(x))
    })
    features['class'] = labels
    return features

# Extract features from training data
training_data = feature_extractor(df_train)
print("Features successfully extracted!")

# Display feature statistics
print(training_data.describe())

# Visualize relationships between features and activities
pc_df = training_data.copy()
classes = training_data['class'].unique()
pc_df['class'] = pc_df['class'].replace(classes[0], 0) # make class numeric for plotting purposes
pc_df['class'] = pc_df['class'].replace(classes[1], 1)
pc_df['class'] = pc_df['class'].replace(classes[2], 2)

fig = px.parallel_coordinates(
    pc_df, color='class',
    title='Feature Relationships Across Activities',
    width=1300
)
fig.show()

# Train the model using PyCaret
exp = setup(training_data, target='class', silent=True, session_id=123)
best_model = compare_models()
print("Best model selected:")
print(best_model)

# Evaluate the selected model
evaluate_model(best_model)

# Upload and test on new data
uploaded = files.upload()
df_test = pd.read_csv(list(uploaded.keys())[0])

# Extract features from test data
test_features = feature_extractor(df_test)
print("Features extracted from test dataset!")

# Predict on the test dataset
predictions = predict_model(best_model, data=test_features)
print("Model predictions on test data:")
print(predictions)

# Visualize predictions
predictions['Time in Seconds'] = predictions.index
fig = px.bar(
    predictions,
    x='Time in Seconds',
    y='mean',
    color='prediction_label',
    title="Predicted Activities Over Time"
)
fig.show()




Saving Raw Data.csv to Raw Data (2).csv
Data successfully uploaded and read!
           Time (s)  Acceleration x (m/s^2)  Acceleration y (m/s^2)  \
count  15569.000000            15569.000000            15569.000000   
mean      77.630138               -0.850797               -2.368173   
std       44.824869                3.701139                7.885678   
min       -0.000909              -30.589788              -38.735100   
25%       38.814288               -1.510359               -6.291716   
50%       77.629936               -0.452808               -2.860851   
75%      116.445823                0.143102                3.360811   
max      155.262071               16.711099               21.696033   

       Acceleration z (m/s^2)  Absolute acceleration (m/s^2)  
count            15569.000000                   15569.000000  
mean                10.836218                      14.399118  
std                 10.363320                       9.984814  
min                -20.346888  

Features successfully extracted!
             mean     std_dev         min         max       range      median  \
count  120.000000  120.000000  120.000000  120.000000  120.000000  120.000000   
mean    14.207561    4.785195    7.946260   26.256636   18.310376   12.965354   
std      5.836369    5.729268    1.654979   20.667804   21.238236    4.151353   
min      9.693950    0.107888    2.278434    9.947804    0.469666    9.616722   
25%      9.759468    0.213812    7.084536   10.266855    0.980611    9.754935   
50%     11.143464    1.767624    7.999769   14.894614    7.316128   11.240032   
75%     19.142564   11.003729    9.365307   49.216032   42.157606   14.845669   
max     30.788891   17.102634   11.841148   71.378477   64.961723   29.727192   

              mad    skewness    kurtosis  
count  120.000000  120.000000  120.000000  
mean     3.828997    0.280834   -0.050606  
std      4.608817    0.680193    0.921925  
min      0.083114   -1.036424   -1.425828  
25%      0.166701

TypeError: setup() got an unexpected keyword argument 'silent'

In [6]:
# Import Libraries
import pandas as pd
import numpy as np
import plotly.express as px
from pycaret.classification import setup, compare_models, evaluate_model, predict_model
from scipy import stats
import warnings

warnings.filterwarnings('ignore')

# Upload Training Data
uploaded_train = files.upload()
train_file_name = list(uploaded_train.keys())[0]
train_data = pd.read_csv(train_file_name)

# Display raw data description
print(train_data.describe())

# Visualize raw acceleration data by three axes over time
fig = px.line(train_data, x='Time (s)', y=['Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)'])
fig.update_yaxes(title='Acceleration (m/s^2) in X, Y, and Z Planes')
fig.update_xaxes(title='Time in Seconds')
fig.show()

# Visualize absolute acceleration over time
fig = px.line(train_data, x='Time (s)', y=['Absolute acceleration (m/s^2)'])
fig.update_yaxes(title='Absolute Acceleration (m/s^2)')
fig.update_xaxes(title='Time in Seconds')
fig.show()

# Annotate activities
Activity_1 = "Walking"
Activity_1_Start_Time = 5
Activity_1_Finish_Time = 15

Activity_2 = "Running"
Activity_2_Start_Time = 20
Activity_2_Finish_Time = 30

Activity_3 = "Jumping"
Activity_3_Start_Time = 35
Activity_3_Finish_Time = 45

# Add class labels to the data
train_data['class'] = 'NAN'
train_data['class'] = np.where(train_data['Time (s)'].between(Activity_1_Start_Time, Activity_1_Finish_Time), Activity_1, train_data['class'])
train_data['class'] = np.where(train_data['Time (s)'].between(Activity_2_Start_Time, Activity_2_Finish_Time), Activity_2, train_data['class'])
train_data['class'] = np.where(train_data['Time (s)'].between(Activity_3_Start_Time, Activity_3_Finish_Time), Activity_3, train_data['class'])
train_data = train_data[train_data['class'] != 'NAN']

# Visualize activities on a scatter plot
color_map = {Activity_1: "#F1948A", Activity_2: "#48C9B0", Activity_3: "#85C1E9"}
fig = px.scatter(train_data, x='Time (s)', y='class', color='class', color_discrete_map=color_map)
fig.update_yaxes(title='Classes')
fig.show()

# Plot absolute acceleration over time, colored by activity
fig = px.scatter(train_data, x='Time (s)', y='Absolute acceleration (m/s^2)', color='class', color_discrete_map=color_map)
fig.show()

# Visualize the distribution of absolute acceleration by activity
fig = px.histogram(train_data, x='Absolute acceleration (m/s^2)', color='class', color_discrete_map=color_map)
fig.update_yaxes(title='Count of Instances')
fig.show()

# Feature Extraction Function
def feature_extractor(df, window_size=100, step_size=100):
    features = []
    labels = []
    for i in range(0, len(df) - window_size, step_size):
        window = df.iloc[i:i + window_size]
        acc = window['Absolute acceleration (m/s^2)']

        # Extract features
        result = {
            'mean': acc.mean(),
            'std': acc.std(),
            'min': acc.min(),
            'max': acc.max(),
            'median': np.median(acc),
            'mad': np.mean(np.abs(acc - np.mean(acc))),
            'skewness': stats.skew(acc),
            'kurtosis': stats.kurtosis(acc)
        }

        # Add label if available
        if 'class' in window.columns:
            label = window['class'].mode()[0]
            result['class'] = label

        features.append(result)
    return pd.DataFrame(features)

# Extract features from training data
training_data = feature_extractor(train_data)
print("Features extracted from training data.")
print(training_data.describe())

# Parallel Coordinates Plot to visualize feature distributions
pc_df = training_data.copy()
class_labels = pc_df['class'].unique()
pc_df['class'] = pc_df['class'].replace({class_labels[0]: 0, class_labels[1]: 1, class_labels[2]: 2})

fig = px.parallel_coordinates(pc_df, color='class', title='Feature Distributions by Class')
fig.show()

# Train the model using PyCaret
exp = setup(training_data, target='class', session_id=123)
best_model = compare_models()
print("Best model selected:")
print(best_model)

# Evaluate the selected model
evaluate_model(best_model)

# Upload Test Data
uploaded_test = files.upload()
test_file_name = list(uploaded_test.keys())[0]
test_data = pd.read_csv(test_file_name)

# Extract features from test data
testing_data = feature_extractor(test_data)
print("Features extracted from testing data.")

# Predict using the trained model
predictions = predict_model(best_model, data=testing_data)
print("Predictions on test data:")
print(predictions)

# Visualize Predictions
predictions['Time (s)'] = range(len(predictions))
fig = px.bar(predictions, x='Time (s)', y='mean', color='Label', title='Predictions on Test Data', color_discrete_map=color_map)
fig.show()


Saving Raw Data.csv to Raw Data (3).csv
           Time (s)  Acceleration x (m/s^2)  Acceleration y (m/s^2)  \
count  15569.000000            15569.000000            15569.000000   
mean      77.630138               -0.850797               -2.368173   
std       44.824869                3.701139                7.885678   
min       -0.000909              -30.589788              -38.735100   
25%       38.814288               -1.510359               -6.291716   
50%       77.629936               -0.452808               -2.860851   
75%      116.445823                0.143102                3.360811   
max      155.262071               16.711099               21.696033   

       Acceleration z (m/s^2)  Absolute acceleration (m/s^2)  
count            15569.000000                   15569.000000  
mean                10.836218                      14.399118  
std                 10.363320                       9.984814  
min                -20.346888                       2.278434  
25%  

Features extracted from training data.
            mean        std        min        max     median        mad  \
count  30.000000  30.000000  30.000000  30.000000  30.000000  30.000000   
mean   23.209013  13.481330   7.194981  57.940770  19.181614  10.767829   
std     3.793455   2.319219   2.441121   8.645995   3.686924   2.094554   
min    16.027154   7.468564   2.278434  37.139020  11.957127   6.103309   
25%    20.422170  11.828626   6.078507  54.054236  16.664915   9.419976   
50%    23.498643  13.642275   7.201887  58.275299  19.282512  10.557846   
75%    24.694005  15.156935   8.955317  63.364614  21.083887  12.351730   
max    32.004282  18.647405  11.841148  74.838005  26.422620  15.688003   

        skewness   kurtosis  
count  30.000000  30.000000  
mean    1.044196   0.204130  
std     0.284851   0.840811  
min     0.470214  -1.033232  
25%     0.875379  -0.316073  
50%     1.033621   0.132823  
75%     1.182518   0.444878  
max     1.721105   2.810008  


Unnamed: 0,Description,Value
0,Session id,123
1,Target,class
2,Target type,Multiclass
3,Target mapping,"Jumping: 0, Running: 1, Walking: 2"
4,Original data shape,"(30, 9)"
5,Transformed data shape,"(30, 9)"
6,Transformed train set shape,"(21, 9)"
7,Transformed test set shape,"(9, 9)"
8,Numeric features,8
9,Preprocess,True


Processing:   0%|          | 0/65 [00:00<?, ?it/s]

Best model selected:
[]


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

Saving Raw Data.csv to Raw Data (4).csv
Features extracted from testing data.


AttributeError: 'list' object has no attribute 'predict'