In [1]:
pip install numpy pandas scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/eb/cb/22891a0048dcb66c61a7fb3153829b1b9e09d6242e4dae00b17983377340/scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl.metadata
  Downloading scikit_learn-1.4.0-cp39-cp39-macosx_12_0_arm64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Obtaining dependency information for joblib>=1.2.0 from https://files.pythonhosted.org/packages/10/40/d551139c85db202f1f384ba8bcf96aca2f329440a844f924c8a0040b6d02/joblib-1.3.2-py3-none-any.whl.metadata
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Obtaining dependency information for threadpoolctl>=2.0.0 from https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl.metadata
  Downloadi

In [None]:
# Data Loading and Preprocessing
import pandas as pd
import glob

# Path to your folder
path = 'labled_data'

# Find all CSV files in the folder
all_files = glob.glob(path + "/Tracking_video_*_labled.csv")

# List to store the dataframes
li = []

# Read each CSV file and add it to the list
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

# Combine all dataframes in the list into a single dataframe
data = pd.concat(li, axis=0, ignore_index=True)

In [None]:
# Model Training
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Features
X = data.drop('Label', axis=1)
# Labels
y = data['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = RandomForestClassifier()

# Train the model
model.fit(X_train, y_train)

In [None]:
# Model Evaluation
from sklearn.metrics import accuracy_score, classification_report

# Predict the labels for the test set
y_pred = model.predict(X_test)

# Print the accuracy and the classification report
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
# Model Saving
import joblib

# Save the model
joblib.dump(model, 'model.pkl')

In [None]:
# Model Loading and Usage
import joblib

# Load the model
loaded_model = joblib.load('model.pkl')

# Use the loaded model to make predictions
# For example, predict the label for the first instance in the test set
single_prediction = loaded_model.predict([X_test.iloc[0]])
print("Prediction for the first instance in the test set:", single_prediction)