In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


In [2]:

X_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\X_train.txt', delim_whitespace=True, header=None)
y_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\y_train.txt', header=None)
subject_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\subject_train.txt', header=None)

# Load the test data
X_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\X_test.txt', delim_whitespace=True, header=None)
y_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\y_test.txt', header=None)
subject_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\subject_test.txt', header=None)

In [3]:
X_train_features = X_train.mean(axis=1).to_frame('mean')
X_train_features['std'] = X_train.std(axis=1)

X_test_features = X_test.mean(axis=1).to_frame('mean')
X_test_features['std'] = X_test.std(axis=1)

In [4]:
train_data = pd.concat([subject_train, y_train, X_train_features], axis=1)
test_data = pd.concat([subject_test, y_test, X_test_features], axis=1)

In [5]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)
X_test_scaled = scaler.transform(X_test_features)

In [6]:
# Prepare the target variable (activity labels)
y_train = y_train.values.flatten()
y_test = y_test.values.flatten()

print(X_train_scaled[:5], y_train[:5])


[[-0.70712567  2.34091653]
 [-0.90556948  0.82143234]
 [-0.93534548  0.64995939]
 [-0.9712132   0.72119976]
 [-0.94857522  0.75620036]] [5 5 5 5 5]


In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_val_score

In [8]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)


In [9]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy * 100:.2f}%")

Accuracy on test set: 44.04%


In [10]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.48      0.54      0.51       496
           2       0.50      0.52      0.51       471
           3       0.61      0.48      0.54       420
           4       0.31      0.28      0.30       491
           5       0.39      0.46      0.42       532
           6       0.41      0.37      0.39       537

    accuracy                           0.44      2947
   macro avg       0.45      0.44      0.44      2947
weighted avg       0.44      0.44      0.44      2947



In [11]:
cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5)
print(f"Cross-validation scores: {cv_scores}")

Cross-validation scores: [0.46091094 0.46566961 0.45646259 0.44353741 0.48095238]


In [12]:
from sklearn.svm import SVC

In [15]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

# Check the unique labels in the target variable
print(f"Unique labels in y_train: {np.unique(y_train)}")
print(f"Unique labels in y_test: {np.unique(y_test)}")

X_train shape: (7352, 561)
X_test shape: (2947, 561)
y_train shape: (7352,)
y_test shape: (2947,)
Unique labels in y_train: [1 2 3 4 5 6]
Unique labels in y_test: [1 2 3 4 5 6]


In [16]:
X_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\X_train.txt', delim_whitespace=True, header=None)
y_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\y_train.txt', delim_whitespace=True, header=None)
subject_train = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\train\subject_train.txt', delim_whitespace=True, header=None)

# Load test data
X_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\X_test.txt', delim_whitespace=True, header=None)
y_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\y_test.txt', delim_whitespace=True, header=None)
subject_test = pd.read_csv(r'D:\ChildTiming\data\UCI HAR Dataset\test\subject_test.txt', delim_whitespace=True, header=None)

In [17]:
X_full = pd.concat([X_train, X_test], axis=0)

# Combine train and test sets into a single dataset for target labels (y)
y_full = pd.concat([y_train, y_test], axis=0).values.flatten()

In [24]:
print(X_full)

           0         1         2         3         4         5         6    \
0     0.288585 -0.020294 -0.132905 -0.995279 -0.983111 -0.913526 -0.995112   
1     0.278419 -0.016411 -0.123520 -0.998245 -0.975300 -0.960322 -0.998807   
2     0.279653 -0.019467 -0.113462 -0.995380 -0.967187 -0.978944 -0.996520   
3     0.279174 -0.026201 -0.123283 -0.996091 -0.983403 -0.990675 -0.997099   
4     0.276629 -0.016570 -0.115362 -0.998139 -0.980817 -0.990482 -0.998321   
...        ...       ...       ...       ...       ...       ...       ...   
2942  0.310155 -0.053391 -0.099109 -0.287866 -0.140589 -0.215088 -0.356083   
2943  0.363385 -0.039214 -0.105915 -0.305388  0.028148 -0.196373 -0.373540   
2944  0.349966  0.030077 -0.115788 -0.329638 -0.042143 -0.250181 -0.388017   
2945  0.237594  0.018467 -0.096499 -0.323114 -0.229775 -0.207574 -0.392380   
2946  0.153627 -0.018437 -0.137018 -0.330046 -0.195253 -0.164339 -0.430974   

           7         8         9    ...       551       552    

In [18]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test_scaled = scaler.transform(X_test) 

In [19]:
svm_model = SVC(kernel='linear', random_state=42)  # You can try other kernels like 'rbf'
svm_model.fit(X_train_scaled, y_train.values.flatten())

In [20]:
y_pred = svm_model.predict(X_test_scaled)

In [21]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy * 100:.2f}%")

# Print classification report for detailed evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy on test set: 96.10%
Classification Report:
              precision    recall  f1-score   support

           1       0.96      1.00      0.98       496
           2       0.96      0.96      0.96       471
           3       0.99      0.95      0.97       420
           4       0.96      0.88      0.92       491
           5       0.90      0.97      0.93       532
           6       1.00      1.00      1.00       537

    accuracy                           0.96      2947
   macro avg       0.96      0.96      0.96      2947
weighted avg       0.96      0.96      0.96      2947



In [22]:
import joblib

In [23]:
joblib.dump(svm_model, 'uci_svm_model.pkl')

print("UCI HAR Dataset SVM model saved successfully.")

UCI HAR Dataset SVM model saved successfully.
