In [1]:
from libsvm.svmutil import *
import numpy as np
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics.pairwise import rbf_kernel
import matplotlib.pyplot as plt


In [2]:
y, x = svm_read_problem('mnist.scale')
x

[{153: 0.0117647,
  154: 0.0705882,
  155: 0.0705882,
  156: 0.0705882,
  157: 0.494118,
  158: 0.533333,
  159: 0.686275,
  160: 0.101961,
  161: 0.65098,
  162: 1.0,
  163: 0.968627,
  164: 0.498039,
  177: 0.117647,
  178: 0.141176,
  179: 0.368627,
  180: 0.603922,
  181: 0.666667,
  182: 0.992157,
  183: 0.992157,
  184: 0.992157,
  185: 0.992157,
  186: 0.992157,
  187: 0.882353,
  188: 0.67451,
  189: 0.992157,
  190: 0.94902,
  191: 0.764706,
  192: 0.25098,
  204: 0.192157,
  205: 0.933333,
  206: 0.992157,
  207: 0.992157,
  208: 0.992157,
  209: 0.992157,
  210: 0.992157,
  211: 0.992157,
  212: 0.992157,
  213: 0.992157,
  214: 0.984314,
  215: 0.364706,
  216: 0.321569,
  217: 0.321569,
  218: 0.219608,
  219: 0.152941,
  232: 0.0705882,
  233: 0.858824,
  234: 0.992157,
  235: 0.992157,
  236: 0.992157,
  237: 0.992157,
  238: 0.992157,
  239: 0.776471,
  240: 0.713725,
  241: 0.968627,
  242: 0.945098,
  261: 0.313725,
  262: 0.611765,
  263: 0.419608,
  264: 0.992157,
 

In [3]:

def class_choices(x, y ,class_a, class_b):
    label_class = [] 
    feature_class = []
    nx = np.array(x)
    ny = np.array(y)
    new_set = (ny  == class_a )| (ny == class_b)
    label_class = ny[new_set]
    feature_class = nx[new_set]
    # for label  in range(len(y)) :
    #     if y[label] == class_a or y[label] == class_b :
    #         label_class.append(y[label])
    #         feature_class.append(x[label])
    label_class = np.where(label_class == class_a, 1, -1)

    # return np.array(label_class) , np.array(feature_class)
    return label_class , feature_class
            
label_class ,feature_class = class_choices(x ,y ,class_b = 7 , class_a = 3)
 
print(label_class[:20])
print(feature_class[:10])


[ 1  1  1 -1  1 -1  1 -1 -1  1  1  1 -1 -1  1 -1 -1  1 -1 -1]
[{152: 0.14902, 153: 0.168627, 154: 0.411765, 155: 1.0, 156: 0.992157, 157: 0.992157, 158: 0.992157, 159: 0.992157, 160: 0.992157, 161: 0.682353, 162: 0.0235294, 178: 0.168627, 179: 0.545098, 180: 0.878431, 181: 0.886275, 182: 0.988235, 183: 0.992157, 184: 0.988235, 185: 0.988235, 186: 0.988235, 187: 0.988235, 188: 0.988235, 189: 0.988235, 190: 0.619608, 191: 0.054902, 206: 0.698039, 207: 0.988235, 208: 0.988235, 209: 0.988235, 210: 0.988235, 211: 0.992157, 212: 0.988235, 213: 0.988235, 214: 0.988235, 215: 0.988235, 216: 0.988235, 217: 0.988235, 218: 0.988235, 219: 0.231373, 234: 0.427451, 235: 0.988235, 236: 0.988235, 237: 0.901961, 238: 0.517647, 239: 0.521569, 240: 0.517647, 241: 0.517647, 242: 0.741176, 243: 0.988235, 244: 0.988235, 245: 0.988235, 246: 0.988235, 247: 0.231373, 262: 0.0156863, 263: 0.113725, 264: 0.113725, 265: 0.0941176, 270: 0.054902, 271: 0.886275, 272: 0.988235, 273: 0.988235, 274: 0.67451, 275: 0.027

In [None]:
from tqdm import tqdm
import random
C_value = 1
gamma_values = [0.01, 0.1, 1, 10, 100]
num_iterations = 128

# Initialize the counter for gamma selection
gamma_selection_counts = {gamma: 0 for gamma in gamma_values}
num_samples = len(label_class)

for iteration in range(num_iterations):
    # Randomly sample indices for validation and training
    all_indices = np.arange(num_samples)
    random.seed(iteration)  # For reproducibility
    random.shuffle(all_indices)
    
    val_indices = all_indices[:200]
    train_indices = all_indices[200:]
    
    # Create training and validation sets
    y_train = [label_class[i] for i in train_indices]
    x_train = [feature_class[i] for i in train_indices]
    
    y_val = [label_class[i] for i in val_indices]
    x_val = [feature_class[i] for i in val_indices]
    
    # Initialize variables to store the best gamma and lowest error
    best_gamma = None
    lowest_error = float('inf')
    errors = {}
    
    for gamma in gamma_values:
        # Prepare the parameter string
        param_str = f'-q -t 2 -c {C_value} -g {gamma}'
        
        # Train the model on training data
        model = svm_train(y_train, x_train, param_str)
        
        # Predict on validation data
        p_labels, p_acc, p_vals = svm_predict(y_val, x_val, model, '-q')
        
        # Calculate 0/1 error (error rate)
        error_rate = 100 - p_acc[0]  # p_acc[0] is accuracy percentage
        errors[gamma] = error_rate
    
 
    min_error = min(errors.values())
    best_gammas = [gamma for gamma, error in errors.items() if error == min_error]
    
  
    selected_gamma = min(best_gammas)
    
    # Update the counter
    gamma_selection_counts[selected_gamma] += 1
        
           

Exception ignored on calling ctypes callback function: <function print_null at 0x000002BD28260FE0>
Traceback (most recent call last):
  File "c:\Users\cheeh\anaconda3\Lib\site-packages\libsvm\svm.py", line 58, in print_null
    def print_null(s):
    
KeyboardInterrupt: 
Exception ignored on calling ctypes callback function: <function print_null at 0x000002BD28260FE0>
Traceback (most recent call last):
  File "c:\Users\cheeh\anaconda3\Lib\site-packages\libsvm\svm.py", line 58, in print_null
    def print_null(s):
    
KeyboardInterrupt: 


In [None]:
# Extract gamma values and their counts
gammas = list(gamma_selection_counts.keys())
counts = [gamma_selection_counts[gamma] for gamma in gammas]

# Create the bar chart
plt.figure(figsize=(10, 6))
plt.bar([str(gamma) for gamma in gammas], counts, color='skyblue')
plt.xlabel('Gamma Values')
plt.ylabel('Selection Frequency')
plt.title('Gamma Selection Frequency over 128 Iterations')
plt.show()