In [14]:
import glob
import os
import pickle

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [4]:
folder_path = '../data/history/raw'
csv_files = glob.glob(os.path.join(folder_path, '*.csv'))

dfs = []
for file in csv_files:
    df = pd.read_csv(file)

    is_walkable_cols = [col for col in df.columns if col.endswith('.is_walkable')]

    indices = []
    for col in is_walkable_cols:
        if col.startswith('choosen_pattern'):
            continue
        try:
            idx = int(col.split('.')[0])
            indices.append(idx)
        except ValueError:
            continue
    
    if indices:
        indices.sort()
        middle_idx = indices[len(indices) // 2]
    else:
        middle_idx = None
    
    selected_columns = ['success', 'choosen_pattern.is_walkable']
    
    for col in is_walkable_cols:
        if col.startswith('choosen_pattern'):
            continue
        
        try:
            col_idx = int(col.split('.')[0])
            if col_idx != middle_idx:
                selected_columns.append(col)
        except ValueError:
            selected_columns.append(col)
    
    df_selected = df[selected_columns]
    dfs.append(df_selected)

df = pd.concat(dfs, ignore_index=True)

print(f"Successfully concatenated {len(csv_files)} files")
print(f"Final dataframe shape: {df.shape}")
print("Columns:", df.columns.tolist())

Successfully concatenated 1628 files
Final dataframe shape: (3256, 10)
Columns: ['success', 'choosen_pattern.is_walkable', '0.is_walkable', '1.is_walkable', '2.is_walkable', '3.is_walkable', '5.is_walkable', '6.is_walkable', '7.is_walkable', '8.is_walkable']


In [13]:
df.to_csv("../data/history/concat/history-0.csv", index=False)

In [6]:
X = df.drop('success', axis=1)
y = df['success']

X_train_test, X_val, y_train_test, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_test, y_train, y_test = train_test_split(
    X_train_test, y_train_test, test_size=0.25, random_state=42, stratify=y_train_test
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Validation set size: {X_val.shape[0]}")

Training set size: 1953
Test set size: 651
Validation set size: 652


In [7]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred_test = dt_classifier.predict(X_test)

test_accuracy = accuracy_score(y_test, y_pred_test)
print(f"\nTest Accuracy: {test_accuracy:.4f}")
print("\nTest Classification Report:")
print(classification_report(y_test, y_pred_test))

y_pred_val = dt_classifier.predict(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
print(f"\nValidation Accuracy: {val_accuracy:.4f}")
print("\nValidation Classification Report:")
print(classification_report(y_val, y_pred_val))


Test Accuracy: 0.9293

Test Classification Report:
              precision    recall  f1-score   support

       False       0.91      0.96      0.93       326
        True       0.95      0.90      0.93       325

    accuracy                           0.93       651
   macro avg       0.93      0.93      0.93       651
weighted avg       0.93      0.93      0.93       651


Validation Accuracy: 0.9172

Validation Classification Report:
              precision    recall  f1-score   support

       False       0.90      0.94      0.92       326
        True       0.94      0.89      0.92       326

    accuracy                           0.92       652
   macro avg       0.92      0.92      0.92       652
weighted avg       0.92      0.92      0.92       652



In [11]:
with open("../weights/dtree/decision_tree_model.pkl", "wb") as file:
    pickle.dump(dt_classifier, file)