In [1]:
import math
import numpy as np
from scipy.spatial import ConvexHull

def compute_features(json_data, features):
    """
    Compute requested features from a JSON object representing a drawing.

    Args:
        json_data (dict): JSON object representing a drawing with 'strokes' field.
        features (list): List of features to compute. Each feature is a string.

    Returns:
        dict: A dictionary containing computed features for the drawing.
    """
    # Extract strokes from the JSON data
    strokes = json_data.get('strokes', [])
    
    # Initialize dictionary to store computed features
    computed_features = {}

    # Compute requested features
    for feature in features:
        if feature == 'total_strokes':
            # Total number of strokes in the drawing
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            # Total number of points in all strokes combined
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            # Average number of points per stroke
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            # Duration of the drawing (time between first and last point)
            if strokes:
                start_time = min(min(point['t'] for point in stroke) for stroke in strokes)
                end_time = max(max(point['t'] for point in stroke) for stroke in strokes)
                drawing_duration = end_time - start_time
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            # Distance between the first and last point of all strokes combined
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            # Ratio of area of convex hull to area of bounding rectangle
            if strokes:
                # Collect all points from all strokes
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                
                # Calculate bounding rectangle area
                min_x = min(point[0] for point in all_points)
                max_x = max(point[0] for point in all_points)
                min_y = min(point[1] for point in all_points)
                max_y = max(point[1] for point in all_points)
                bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                
                # Calculate convex hull area
                hull = ConvexHull(all_points)
                convex_hull_area = hull.area
                
                # Compute the ratio
                if bounding_rectangle_area > 0:
                    ratio = convex_hull_area / bounding_rectangle_area
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio
        
        # Add more features as needed...

    return computed_features


In [2]:
import json

# Function to load JSON data from file
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

# Example JSON file path (replace with your actual file path)
json_file_path = 'bad_A_12.json'

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Load JSON data from file
drawing_data = load_json_file(json_file_path)

# Compute features for the drawing data
computed_features = compute_features(drawing_data, requested_features)

# Print the computed features
print("Computed Features:")
for feature, value in computed_features.items():
    print(f"{feature}: {value}")


Computed Features:
total_strokes: 1
total_points: 1130
average_points_per_stroke: 1130.0
drawing_duration: 3.8226873874664307
distance_first_to_last_point: 163.24827717314508
convex_hull_to_bounding_rectangle_ratio: 0.010176624297020185


In [3]:
import os
import json
import math
import numpy as np
from scipy.spatial import ConvexHull

def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

def compute_features(json_data, features):
    strokes = json_data.get('strokes', [])
    computed_features = {}

    for feature in features:
        if feature == 'total_strokes':
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            if strokes:
                start_time = min(min(point['t'] for point in stroke) for stroke in strokes)
                end_time = max(max(point['t'] for point in stroke) for stroke in strokes)
                drawing_duration = end_time - start_time
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            if strokes:
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                min_x = min(point[0] for point in all_points)
                max_x = max(point[0] for point in all_points)
                min_y = min(point[1] for point in all_points)
                max_y = max(point[1] for point in all_points)
                bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                hull = ConvexHull(all_points)
                convex_hull_area = hull.area
                if bounding_rectangle_area > 0:
                    ratio = convex_hull_area / bounding_rectangle_area
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio

    return computed_features

def process_drawing_files(folder_path, features):
    drawing_data = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json') and ('bad_A_' in filename or 'good_A_' in filename):
            file_path = os.path.join(folder_path, filename)
            json_data = load_json_file(file_path)
            computed_features = compute_features(json_data, features)

            # Determine label based on filename prefix
            if 'bad_A_' in filename:
                label = 0
            else:
                label = 1

            # Add computed features and label to the list
            computed_features['label'] = label
            drawing_data.append(computed_features)

    return drawing_data

# Specify the folder path where the JSON files are located
folder_path = '.'  # Assuming JSON files are in the same directory as this script

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Process drawing files and compute features
drawings_features = process_drawing_files(folder_path, requested_features)

# Print out computed features with labels
for drawing in drawings_features:
    print("Features:", drawing)


Features: {'total_strokes': 2, 'total_points': 561, 'average_points_per_stroke': 280.5, 'drawing_duration': 1.3009135723114014, 'distance_first_to_last_point': 337.0370899470858, 'convex_hull_to_bounding_rectangle_ratio': 0.012089949476637052, 'label': 0}
Features: {'total_strokes': 2, 'total_points': 496, 'average_points_per_stroke': 248.0, 'drawing_duration': 1.8920116424560547, 'distance_first_to_last_point': 168.5496959356498, 'convex_hull_to_bounding_rectangle_ratio': 0.04109225623668547, 'label': 0}
Features: {'total_strokes': 3, 'total_points': 985, 'average_points_per_stroke': 328.3333333333333, 'drawing_duration': 6.094426870346069, 'distance_first_to_last_point': 244.1823089414956, 'convex_hull_to_bounding_rectangle_ratio': 0.012731247823205729, 'label': 0}
Features: {'total_strokes': 1, 'total_points': 1130, 'average_points_per_stroke': 1130.0, 'drawing_duration': 3.8226873874664307, 'distance_first_to_last_point': 163.24827717314508, 'convex_hull_to_bounding_rectangle_ratio

In [4]:
import os
import json
import math
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

def compute_features(json_data, features):
    strokes = json_data.get('strokes', [])
    computed_features = {}

    for feature in features:
        if feature == 'total_strokes':
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            if strokes:
                start_time = min(min(point['t'] for point in stroke) for stroke in strokes)
                end_time = max(max(point['t'] for point in stroke) for stroke in strokes)
                drawing_duration = end_time - start_time
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            if strokes:
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                min_x = min(point[0] for point in all_points)
                max_x = max(point[0] for point in all_points)
                min_y = min(point[1] for point in all_points)
                max_y = max(point[1] for point in all_points)
                bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                hull = ConvexHull(all_points)
                convex_hull_area = hull.area
                if bounding_rectangle_area > 0:
                    ratio = convex_hull_area / bounding_rectangle_area
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio

    return computed_features

def process_drawing_files(folder_path, features):
    drawing_data = []
    labels = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json') and ('bad_A_' in filename or 'good_A_' in filename):
            file_path = os.path.join(folder_path, filename)
            json_data = load_json_file(file_path)
            computed_features = compute_features(json_data, features)

            # Determine label based on filename prefix
            if 'bad_A_' in filename:
                label = 0
            else:
                label = 1

            # Add computed features and label to the lists
            drawing_data.append(computed_features)
            labels.append(label)

    return drawing_data, labels

# Specify the folder path where the JSON files are located
folder_path = '.'  # Assuming JSON files are in the same directory as this script

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Process drawing files and extract features and labels
X, y = process_drawing_files(folder_path, requested_features)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


TypeError: float() argument must be a string or a number, not 'dict'

In [None]:
import os
import json
import math
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

def compute_features(json_data, features):
    strokes = json_data.get('strokes', [])
    computed_features = {}

    for feature in features:
        if feature == 'total_strokes':
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            if strokes:
                start_times = [min(point['t'] for point in stroke) for stroke in strokes if stroke]
                end_times = [max(point['t'] for point in stroke) for stroke in strokes if stroke]
                drawing_duration = max(end_times) - min(start_times)
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            if strokes:
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                if all_points:
                    min_x = min(point[0] for point in all_points)
                    max_x = max(point[0] for point in all_points)
                    min_y = min(point[1] for point in all_points)
                    max_y = max(point[1] for point in all_points)
                    bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                    if bounding_rectangle_area > 0:
                        hull = ConvexHull(all_points)
                        convex_hull_area = hull.area
                        ratio = convex_hull_area / bounding_rectangle_area
                    else:
                        ratio = 0
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio

    return computed_features

def process_drawing_files(folder_path, features):
    drawing_data = []
    labels = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json') and ('bad_A_' in filename or 'good_A_' in filename):
            file_path = os.path.join(folder_path, filename)
            json_data = load_json_file(file_path)
            computed_features = compute_features(json_data, features)

            # Determine label based on filename prefix
            if 'bad_A_' in filename:
                label = 0
            else:
                label = 1

            # Add computed features and label to the lists
            drawing_data.append(computed_features)
            labels.append(label)

    return drawing_data, labels

# Specify the folder path where the JSON files are located
folder_path = '.'  # Assuming JSON files are in the same directory as this script

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Process drawing files and extract features and labels
X, y = process_drawing_files(folder_path, requested_features)

# Convert features (X) into a numpy array for training
X = np.array([list(d.values()) for d in X])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


In [7]:
import os
import json
import math
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import shap

def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

def compute_features(json_data, features):
    strokes = json_data.get('strokes', [])
    computed_features = {}

    for feature in features:
        if feature == 'total_strokes':
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            if strokes:
                start_times = [min(point['t'] for point in stroke) for stroke in strokes if stroke]
                end_times = [max(point['t'] for point in stroke) for stroke in strokes if stroke]
                drawing_duration = max(end_times) - min(start_times)
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            if strokes:
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                if all_points:
                    min_x = min(point[0] for point in all_points)
                    max_x = max(point[0] for point in all_points)
                    min_y = min(point[1] for point in all_points)
                    max_y = max(point[1] for point in all_points)
                    bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                    if bounding_rectangle_area > 0:
                        hull = ConvexHull(all_points)
                        convex_hull_area = hull.area
                        ratio = convex_hull_area / bounding_rectangle_area
                    else:
                        ratio = 0
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio

    return computed_features

def process_drawing_files(folder_path, features):
    drawing_data = []
    labels = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json') and ('bad_A_' in filename or 'good_A_' in filename):
            file_path = os.path.join(folder_path, filename)
            json_data = load_json_file(file_path)
            computed_features = compute_features(json_data, features)

            # Determine label based on filename prefix
            if 'bad_A_' in filename:
                label = 0
            else:
                label = 1

            # Add computed features and label to the lists
            drawing_data.append(computed_features)
            labels.append(label)

    return drawing_data, labels

# Specify the folder path where the JSON files are located
folder_path = '.'  # Assuming JSON files are in the same directory as this script

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Process drawing files and extract features and labels
X, y = process_drawing_files(folder_path, requested_features)

# Convert features (X) into a numpy array for training
X = np.array([list(d.values()) for d in X])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Explain each prediction using SHAP
explainer = shap.Explainer(model, X_train)
shap_values = explainer.shap_values(X_test)

# Plot SHAP values for each prediction
for i in range(len(X_test)):
    print(f"Prediction {i+1}:")
    shap.force_plot(explainer.expected_value, shap_values[i], X_test[i], feature_names=requested_features)

Accuracy: 0.57
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.67      0.57         3
           1       0.67      0.50      0.57         4

    accuracy                           0.57         7
   macro avg       0.58      0.58      0.57         7
weighted avg       0.60      0.57      0.57         7



IndexError: invalid index to scalar variable.

In [13]:
import os
import json
import math
import numpy as np
from scipy.spatial import ConvexHull
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import shap
import matplotlib.pyplot as plt

def load_json_file(file_path):
    with open(file_path, 'r') as file:
        json_data = json.load(file)
    return json_data

def compute_features(json_data, features):
    strokes = json_data.get('strokes', [])
    computed_features = {}

    for feature in features:
        if feature == 'total_strokes':
            computed_features['total_strokes'] = len(strokes)
        
        elif feature == 'total_points':
            total_points = sum(len(stroke) for stroke in strokes)
            computed_features['total_points'] = total_points
        
        elif feature == 'average_points_per_stroke':
            if len(strokes) > 0:
                avg_points_per_stroke = sum(len(stroke) for stroke in strokes) / len(strokes)
            else:
                avg_points_per_stroke = 0
            computed_features['average_points_per_stroke'] = avg_points_per_stroke
        
        elif feature == 'drawing_duration':
            if strokes:
                start_times = [min(point['t'] for point in stroke) for stroke in strokes if stroke]
                end_times = [max(point['t'] for point in stroke) for stroke in strokes if stroke]
                drawing_duration = max(end_times) - min(start_times)
            else:
                drawing_duration = 0
            computed_features['drawing_duration'] = drawing_duration
        
        elif feature == 'distance_first_to_last_point':
            if strokes:
                first_point = strokes[0][0]
                last_point = strokes[-1][-1]
                distance = math.sqrt((last_point['x'] - first_point['x'])**2 + (last_point['y'] - first_point['y'])**2)
            else:
                distance = 0
            computed_features['distance_first_to_last_point'] = distance
        
        elif feature == 'convex_hull_to_bounding_rectangle_ratio':
            if strokes:
                all_points = [(point['x'], point['y']) for stroke in strokes for point in stroke]
                if all_points:
                    min_x = min(point[0] for point in all_points)
                    max_x = max(point[0] for point in all_points)
                    min_y = min(point[1] for point in all_points)
                    max_y = max(point[1] for point in all_points)
                    bounding_rectangle_area = (max_x - min_x) * (max_y - min_y)
                    if bounding_rectangle_area > 0:
                        hull = ConvexHull(all_points)
                        convex_hull_area = hull.area
                        ratio = convex_hull_area / bounding_rectangle_area
                    else:
                        ratio = 0
                else:
                    ratio = 0
            else:
                ratio = 0
            computed_features['convex_hull_to_bounding_rectangle_ratio'] = ratio

    return computed_features

def process_drawing_files(folder_path, features):
    drawing_data = []
    labels = []

    for filename in os.listdir(folder_path):
        if filename.endswith('.json') and ('bad_A_' in filename or 'good_A_' in filename):
            file_path = os.path.join(folder_path, filename)
            json_data = load_json_file(file_path)
            computed_features = compute_features(json_data, features)

            # Determine label based on filename prefix
            if 'bad_A_' in filename:
                label = 0
            else:
                label = 1

            # Add computed features and label to the lists
            drawing_data.append(computed_features)
            labels.append(label)

    return drawing_data, labels

# Specify the folder path where the JSON files are located
folder_path = '.'  # Assuming JSON files are in the same directory as this script

# List of features to compute
requested_features = [
    'total_strokes', 
    'total_points', 
    'average_points_per_stroke', 
    'drawing_duration',
    'distance_first_to_last_point',
    'convex_hull_to_bounding_rectangle_ratio'
]

# Process drawing files and extract features and labels
X, y = process_drawing_files(folder_path, requested_features)

# Convert features (X) into a numpy array for training
X = np.array([list(d.values()) for d in X])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a logistic regression model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.86
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.67      0.80         3
           1       0.80      1.00      0.89         4

    accuracy                           0.86         7
   macro avg       0.90      0.83      0.84         7
weighted avg       0.89      0.86      0.85         7

