In [8]:
import json
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

def to_numeric(F6):
    if F6 == "?":
        return np.nan
    else:
        return int(F6)

# Load parameters from JSON
with open('svm_sample.json', 'r') as file:
    parameters = json.load(file)
    
your_data = pd.read_csv(parameters['filename'])
your_data['F6'] = your_data['F6'].apply(to_numeric)
mean_F6 = your_data['F6'].mean()
your_data['F6'] = your_data['F6'].fillna(mean_F6)

#Check for target_variable is present or not
target_variable = parameters.get("target_variable", None)
if target_variable is None:
    raise ValueError("Target variable not specified in the parameters.")

X = your_data.drop(columns=[target_variable])
y = your_data[target_variable]

# Define default parameters for SVMClassifier
default_svm_parameters = {
    'C': 1.0,
    'kernel': 'rbf',
    'degree': 3,
    'gamma': 'scale',
    'coef0': 0.0,
    'shrinking': True,
    'probability': False,
    'tol': 0.001,
    'cache_size': 200,
    'class_weight': None,
    'verbose': False,
    'max_iter': -1,
    'decision_function_shape': 'ovr',
    'break_ties': False,
    'random_state': None
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=parameters['split'], random_state=42)


# Merge default and user-provided parameters
merged_parameters = {**default_svm_parameters, **parameters.get("svm_parameters", {})}
# print(merged_parameters)

# Initialize the Decision Tree model with the merged parameters
dt_model = make_pipeline(StandardScaler(), SVC(**merged_parameters))

# Train the Decision Tree model
dt_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = dt_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.9714285714285714
