In [1]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [2]:
!ls /content/drive/My\ Drive/content


UCI_PhysicalActivity.csv


In [3]:
import pandas as pd

file_path = '/content/drive/My Drive/content/UCI_PhysicalActivity.csv'
df = pd.read_csv(file_path)

print(df.head())


             activityID  heart_rate  hand temperature (°C)  \
0  transient activities       104.0                   30.0   
1  transient activities       104.0                   30.0   
2  transient activities       104.0                   30.0   
3  transient activities       104.0                   30.0   
4  transient activities       104.0                   30.0   

   hand acceleration X ±16g  hand acceleration Y ±16g  \
0                   2.37223                   8.60074   
1                   2.18837                   8.56560   
2                   2.37357                   8.60107   
3                   2.07473                   8.52853   
4                   2.22936                   8.83122   

   hand acceleration Z ±16g  hand gyroscope X  hand gyroscope Y  \
0                   3.51048         -0.092217          0.056812   
1                   3.66179         -0.024413          0.047759   
2                   3.54898         -0.057976          0.032574   
3               

In [4]:
print("Missing Values Per Column:")
print(df.isnull().sum())

Missing Values Per Column:
activityID                    0
heart_rate                   46
hand temperature (°C)         0
hand acceleration X ±16g      0
hand acceleration Y ±16g      0
hand acceleration Z ±16g      0
hand gyroscope X              0
hand gyroscope Y              0
hand gyroscope Z              0
hand magnetometer X           0
hand magnetometer Y           0
hand magnetometer Z           0
chest temperature (°C)        0
chest acceleration X ±16g     0
chest acceleration Y ±16g     0
chest acceleration Z ±16g     0
chest gyroscope X             0
chest gyroscope Y             0
chest gyroscope Z             0
chest magnetometer X          0
chest magnetometer Y          0
chest magnetometer Z          0
ankle temperature (°C)        0
ankle acceleration X ±16g     0
ankle acceleration Y ±16g     0
ankle acceleration Z ±16g     0
ankle gyroscope X             0
ankle gyroscope Y             0
ankle gyroscope Z             0
ankle magnetometer X          0
ankle magneto

In [5]:
df['heart_rate'] = df['heart_rate'].fillna(df['heart_rate'].mean())

In [6]:
print("Remaining Missing Values Per Column:")
print(df.isnull().sum())

Remaining Missing Values Per Column:
activityID                   0
heart_rate                   0
hand temperature (°C)        0
hand acceleration X ±16g     0
hand acceleration Y ±16g     0
hand acceleration Z ±16g     0
hand gyroscope X             0
hand gyroscope Y             0
hand gyroscope Z             0
hand magnetometer X          0
hand magnetometer Y          0
hand magnetometer Z          0
chest temperature (°C)       0
chest acceleration X ±16g    0
chest acceleration Y ±16g    0
chest acceleration Z ±16g    0
chest gyroscope X            0
chest gyroscope Y            0
chest gyroscope Z            0
chest magnetometer X         0
chest magnetometer Y         0
chest magnetometer Z         0
ankle temperature (°C)       0
ankle acceleration X ±16g    0
ankle acceleration Y ±16g    0
ankle acceleration Z ±16g    0
ankle gyroscope X            0
ankle gyroscope Y            0
ankle gyroscope Z            0
ankle magnetometer X         0
ankle magnetometer Y         0
an

In [8]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
#I have used StandardScaler because most numerical features are measured on different scales.
#StdScaler centers data by removing the mean and scales it to unit variance.
df_cleaned = df[df["activityID"] != "transient activities"].copy()
numerical_features = ['heart_rate', 'hand temperature (°C)',
                      'hand acceleration X ±16g', 'hand acceleration Y ±16g',
                      'hand acceleration Z ±16g', 'hand gyroscope X',
                      'hand gyroscope Y', 'hand gyroscope Z']

scaler = StandardScaler()
df_cleaned[numerical_features] = scaler.fit_transform(df_cleaned[numerical_features])

In [11]:
from sklearn.preprocessing import LabelEncoder
X = df_cleaned[numerical_features]
label_encoder = LabelEncoder()
df_cleaned['activityID_encoded'] = label_encoder.fit_transform(df_cleaned['activityID']) + 1
y = df_cleaned['activityID_encoded']

print("Processed Data Sample:")
print(df_cleaned.head())


Processed Data Sample:
     activityID  heart_rate  hand temperature (°C)  hand acceleration X ±16g  \
2928      lying   -0.273011              -1.378972                  1.151444   
2929      lying   -0.273011              -1.378972                  1.163734   
2930      lying   -0.273011              -1.378972                  1.163564   
2931      lying   -0.273011              -1.378972                  1.151877   
2932      lying   -0.273011              -1.378972                  1.165193   

      hand acceleration Y ±16g  hand acceleration Z ±16g  hand gyroscope X  \
2928                  0.686578                  0.500010         -0.003890   
2929                  0.598233                  0.539906         -0.129401   
2930                  0.520931                  0.559900         -0.179415   
2931                  0.521113                  0.579165         -0.145339   
2932                  0.537859                  0.628239         -0.052912   

      hand gyroscope Y  han

In [14]:
#For clarity, I have mentioned the mappings because the head of the previous df only displays a certain activity ID 10 which make the bulk of the activityID.
label_mapping = {label: idx + 1 for idx, label in enumerate(label_encoder.classes_)}
print("Label Encoding Mapping:")
print(label_mapping)


Label Encoding Mapping:
{'Nordic walking': 1, 'ascending stairs': 2, 'cycling': 3, 'descending stairs': 4, 'ironing': 5, 'lying': 6, 'rope jumping': 7, 'running': 8, 'sitting': 9, 'standing': 10, 'vacuum cleaning': 11, 'walking': 12}


In [16]:
df_sampled = df_cleaned.sample(frac=0.2, random_state=42)
X_sampled = df_sampled[numerical_features]
y_sampled = df_sampled['activityID_encoded']


In [17]:
#Data Preprocessing is complete and we move onto splitting the dataset into train and test
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_sampled, y_sampled, test_size=0.2, random_state=42, stratify=y_sampled
)


In [18]:
from sklearn.svm import SVC
#I have chosen the rbf kernel because our dataset involves sensor readings like heart rate, gyroscope, and accelerometer values, which are likely to have nonlinear relationships.
#RBF can effectively capture such complexities.
svm = SVC(kernel='rbf', C=1.0, gamma='scale')
svm.fit(X_train, y_train)


In [19]:
y_pred = svm.predict(X_test)


In [20]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Generate Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.8461270333075136
Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.94      0.92      7530
           2       0.68      0.61      0.64      4690
           3       0.92      0.94      0.93      6558
           4       0.73      0.53      0.61      4156
           5       0.79      0.88      0.83      9571
           6       0.97      0.93      0.95      7686
           7       0.95      0.89      0.92      1708
           8       0.95      0.92      0.93      3969
           9       0.88      0.83      0.85      7395
          10       0.79      0.87      0.83      7599
          11       0.72      0.71      0.72      7052
          12       0.89      0.92      0.90      9546

    accuracy                           0.85     77460
   macro avg       0.85      0.83      0.84     77460
weighted avg       0.85      0.85      0.84     77460

Confusion Matrix:
[[7057   99   78   55   36    9    3   21   36   16   27   93]
