In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

df = pd.read_csv("Crime_processed_no_outliers.csv")

X = df.drop(columns=['Arrest'])
y = df['Arrest']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipeline with scaler and SVM
pipeline = Pipeline([
    ('encoding', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)),
    ('scaler', StandardScaler()),
    ('svm', SVC(kernel='rbf'))
])

# Train pipeline
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

joblib.dump(pipeline, 'svm_pipeline_model.pkl')


Accuracy: 0.8954555244454017
Classification Report:
               precision    recall  f1-score   support

           0       0.90      1.00      0.94     41075
           1       0.81      0.12      0.21      5355

    accuracy                           0.90     46430
   macro avg       0.85      0.56      0.58     46430
weighted avg       0.89      0.90      0.86     46430



['svm_pipeline_model.pkl']