# Training SVM

### NEW Method with FD's (Recommended)

##### Train the Model

In [58]:
import os
import numpy as np
from pathlib import Path
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report

def load_fd_vectors(folder: str):
    X, y = [], []
    for fname in os.listdir(folder):
        if fname.endswith("_fd.txt"):
            vec = np.loadtxt(Path(folder, fname), delimiter=",").ravel()
            label = 1 if fname.endswith("T_fd.txt") else 0  # label from suffix
            X.append(vec)
            y.append(label)
    return np.stack(X).astype(np.float32), np.array(y)

# ---------------- TRAIN ----------------
X_train, y_train = load_fd_vectors("trainHOG")

model = make_pipeline(StandardScaler(), LinearSVC())
model.fit(X_train, y_train)


##### Test the Model

In [59]:
# -------------------- TEST ---------------------
X_test, y_test = load_fd_vectors("testHOG") 

pred = model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, pred)*100:.2f}%\n")
print(classification_report(y_test, pred, target_names=["not-person (0)", "person (1)"]))

Accuracy: 68.00%

                precision    recall  f1-score   support

not-person (0)       0.68      0.76      0.72       536
    person (1)       0.68      0.59      0.63       464

      accuracy                           0.68      1000
     macro avg       0.68      0.67      0.67      1000
  weighted avg       0.68      0.68      0.68      1000



## OLD Method


#### This was the old way using the images

In [10]:
from sklearn import svm
import os
import numpy as np
import pickle
model=svm.SVC()

In [12]:
data = []
label = []

for file in os.listdir("people"):
    if file.endswith(".txt") and "_fd" not in file:
        hog = np.loadtxt(os.path.join("people", file), delimiter=",")
        data.append(hog.ravel())   # flatten to 1D
        label.append(1)           # person label

for file in os.listdir("notpeople"):
    if file.endswith(".txt") and "_fd" not in file:
        hog = np.loadtxt(os.path.join("notpeople", file), delimiter=",")
        data.append(hog.ravel())
        label.append(0)           # not-person label

# Stack into 2D NumPy array
X = np.stack(data).astype(np.float64)  # shape: (n_samples, n_features)
y = np.array(label)


#### Trying using FD's instead
Current Method

In [44]:
data, label = [], []
for f in os.listdir("trainHOG"):
    if f.endswith("_fd.txt"):
        data.append(np.loadtxt(os.path.join("trainHOG", f), delimiter=",").ravel())
        label.append(1 if f.endswith("T_fd.txt") else 0)

X = np.stack(data).astype(np.float32)
y = np.array(label)

model.fit(X, y)


## Test Efficacy

In [42]:
# ----------------------------- TEST ---------------------------------
import os, numpy as np
from sklearn.metrics import accuracy_score, classification_report

test_vecs, expected = [], []

for f in os.listdir("trainHOG"):
    if f.endswith("_fd.txt"):
        test_vecs.append(
            np.loadtxt(os.path.join("trainHOG", f), delimiter=",").ravel()
        )
        expected.append(1 if f.endswith("T_fd.txt") else 0)   # label from suffix

if not test_vecs:
    raise RuntimeError(
        "No *_fd.txt files found in 'trainHOG'. "
        "Make sure you ran the extractor with SAVE_DESCRIPTOR=True."
    )

X_test = np.stack(test_vecs).astype(np.float32)
y_test = np.array(expected)

pred = model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, pred)*100:.2f}%\n")
print(classification_report(y_test, pred, target_names=["not-person (0)", "person (1)"]))

Accuracy: 100.00%

                precision    recall  f1-score   support

not-person (0)       1.00      1.00      1.00      1889
    person (1)       1.00      1.00      1.00      1111

      accuracy                           1.00      3000
     macro avg       1.00      1.00      1.00      3000
  weighted avg       1.00      1.00      1.00      3000



#### Old Method with images

In [18]:
testdata=[]
expected=[]
for entry in os.scandir("trainHOG"):
    if entry.name.endswith("T.txt"):
        testdata.append(np.loadtxt(entry.path, delimiter=',').flatten())
        expected.append(1)
    elif entry.name.endswith("F.txt"):
        testdata.append(np.loadtxt(entry.path, delimiter=',').flatten())
        expected.append(0)

In [19]:
q=model.predict(testdata)
print(q.tolist())
print(expected)
n = len(expected)
score=n
for guess in range(0,n):
    score=score-(q[guess]+expected[guess])%2
print(100*(score/n))

ValueError: X has 8192 features, but SVC is expecting 3780 features as input.

### Save Model as file

In [17]:
# save
with open('model.pkl','wb') as f:
    pickle.dump(model,f)

# load
# with open('model.pkl', 'rb') as f:
#     clf2 = pickle.load(f)