# Modeling
Train classification or regression models to predict tool wear.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Ridge
from sklearn.metrics import classification_report, mean_squared_error
from src.data_loader import load_vicomtech_data

In [None]:
df = load_vicomtech_data(selected_only=True)
if 'flank_wear' not in df.columns:
    rename_map = {
        'Vb': 'flank_wear',
        'AE_RMS': 'acoustic_rms',
        'AE_MAX': 'acoustic_peak',
        'F_c_RMS': 'cutting_force_rms',
        'F_c_MAX': 'cutting_force_max',
        'Tool': 'tool_id',
        'ToolID': 'tool_id'
    }
    df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
X = df.drop('flank_wear', axis=1)
y = df['flank_wear']

In [None]:
y_cls = (y > y.median()).astype(int)
X_train, X_test, y_train_cls, y_test_cls = train_test_split(X, y_cls, test_size=0.2, random_state=0)
clf = RandomForestClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train_cls)
pred_cls = clf.predict(X_test)
print(classification_report(y_test_cls, pred_cls))

In [None]:
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y, test_size=0.2, random_state=0)
reg = Ridge(alpha=1.0)
reg.fit(X_train_r, y_train_r)
pred_r = reg.predict(X_test_r)
print('MSE:', mean_squared_error(y_test_r, pred_r))