# imports

In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


In [2]:

data = pd.read_csv('pulsar_stars.csv')
print(data.head)

<bound method NDFrame.head of        Mean_of_the_integrated_profile  \
0                          140.562500   
1                          102.507812   
2                          103.015625   
3                          136.750000   
4                           88.726562   
...                               ...   
17893                      136.429688   
17894                      122.554688   
17895                      119.335938   
17896                      114.507812   
17897                       57.062500   

       Standard_deviation_of_the_integrated_profile  \
0                                         55.683782   
1                                         58.882430   
2                                         39.341649   
3                                         57.178449   
4                                         40.672225   
...                                             ...   
17893                                     59.847421   
17894                                

# Handle missing values

In [3]:
data.dropna(inplace=True)
data.isna().sum()

Mean_of_the_integrated_profile                  0
Standard_deviation_of_the_integrated_profile    0
Excess_kurtosis_of_the_integrated_profile       0
Skewness_of_the_integrated_profile              0
Mean_of_the_DM_SNR_curve                        0
Standard_deviation_of_the_DM_SNR_curve          0
 Excess_kurtosis_of_the_DM_SNR_curve            0
 Skewness_of_the_DM_SNR_curve                   0
target_class                                    0
dtype: int64

# define lable and feature

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data.drop('target_class', axis=1), data['target_class'], test_size=0.2, random_state=42)

In [14]:
kernels = ['rbf', 'linear', 'poly', 'sigmoid']
C_values = [1,100,1000]

# Train and evaluate SVM models
for kernel in kernels:
    for C in C_values:
        model = SVC(kernel=kernel, C=C)
        model.fit(X_train, y_train)
        predictions = model.predict(X_train)
        accuracy = accuracy_score(y_train, predictions)
        print({'Kernel': kernel, 'C': C, 'Accuracy of train': accuracy , ' result' : predictions})
        predictions = model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        print({'Kernel': kernel, 'C': C, 'Accuracy of test': accuracy , ' result' : predictions})
        

{'Kernel': 'rbf', 'C': 1, 'Accuracy of train': 0.9726917167202124, ' result': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}
{'Kernel': 'rbf', 'C': 1, 'Accuracy of test': 0.9731843575418995, ' result': array([0, 0, 0, ..., 0, 0, 1], dtype=int64)}
{'Kernel': 'rbf', 'C': 100, 'Accuracy of train': 0.9779997206313731, ' result': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}
{'Kernel': 'rbf', 'C': 100, 'Accuracy of test': 0.979608938547486, ' result': array([0, 1, 0, ..., 0, 0, 1], dtype=int64)}
{'Kernel': 'rbf', 'C': 1000, 'Accuracy of train': 0.9799553010196955, ' result': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}
{'Kernel': 'rbf', 'C': 1000, 'Accuracy of test': 0.979608938547486, ' result': array([0, 1, 0, ..., 0, 0, 1], dtype=int64)}
{'Kernel': 'linear', 'C': 1, 'Accuracy of train': 0.9791870372957117, ' result': array([0, 0, 0, ..., 0, 0, 0], dtype=int64)}
{'Kernel': 'linear', 'C': 1, 'Accuracy of test': 0.979050279329609, ' result': array([0, 1, 0, ..., 0, 0, 1], dtype=int64)}
{'Kerne

# find best params with GridSearchCV

In [6]:

svr_rbf = SVR(kernel='rbf')
svr_linear = SVR(kernel='linear')
svr_poly = SVR(kernel='poly')

# Define parameter grids
param_grid_rbf = {'C': [1, 10, 100, 500], 'gamma': [0.1, 0.3, 0.5, 0.7, 0.9]}
param_grid_linear = {'C': [1, 10, 100, 1000]}
param_grid_poly = {'degree': [2, 3, 4], 'C': [1, 10, 100, 500], 'gamma': [0.01, 0.03, 0.05]}



# Perform Grid Search for each model
grid_search_rbf = GridSearchCV(svr_rbf, param_grid_rbf, cv=5)
grid_search_linear = GridSearchCV(svr_linear, param_grid_linear, cv=5)
grid_search_poly = GridSearchCV(svr_poly, param_grid_poly, cv=5)

# Fit the models
grid_search_rbf.fit(X_train, y_train)
grid_search_linear.fit(X_train, y_train)
grid_search_poly.fit(X_train, y_train)

# Print the best parameters for each model
print("Best parameters for RBF kernel:", grid_search_rbf.best_params_)
print("Best parameters for Linear kernel:", grid_search_linear.best_params_)
print("Best parameters for Polynomial kernel:", grid_search_poly.best_params_)


# Calculate precision, recall, and f1-score

In [None]:
# Predictions on the test set using the best model
predictions = grid_search_linear.predict(X_train)


precision = precision_score(y_train, predictions)
recall = recall_score(y_train, predictions)
f1 = f1_score(y_train, predictions)

# Calculate accuracy
accuracy = accuracy_score(y_train, predictions)

# Print the results
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Accuracy: {accuracy:.4f}")