In [13]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score


In [14]:
species_names = ['comcuc', 'cowpig1', 'eucdov', 'eueowl1', 'grswoo', 'tawowl1']
training_data = []
training_labels = []

for species_name in species_names:
    labels = np.load(f'{species_name}_combined_labels.npy')
    training_labels.append(labels)

In [15]:
training_data = np.load('2reduced_combined_features.npy')
training_labels = np.concatenate(training_labels)

In [16]:
# Define the number of samples to keep from each class
num_samples_per_class = 2000

reduced_training_data = []
reduced_training_labels = []

# Iterate over each class
for i in range(6):
    class_data = training_data[i * 20000 : (i + 1) * 20000]
    class_labels = training_labels[i * 20000 : (i + 1) * 20000]

    # Randomly select num_samples_per_class samples from each class
    random_indices = np.random.choice(20000, num_samples_per_class, replace=False)
    reduced_class_data = class_data[random_indices]
    reduced_class_labels = class_labels[random_indices]

    reduced_training_data.append(reduced_class_data)
    reduced_training_labels.append(reduced_class_labels)


In [17]:
reduced_training_data = np.concatenate(reduced_training_data)
reduced_training_labels = np.concatenate(reduced_training_labels)

In [20]:
np.random.seed(42)  # Set a seed for reproducibility
shuffle_indices = np.random.permutation(len(reduced_training_data))
reduced_training_data = reduced_training_data[shuffle_indices]
reduced_training_labels = reduced_training_labels[shuffle_indices]


In [23]:
# Split the reduced training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    reduced_training_data, reduced_training_labels, test_size=0.2, random_state=42
)


In [24]:
# Perform a randomized search to find the best parameters
param_dist = {
    'C': [0.1, 1, 10],
    'gamma': [0.1, 1, 10],
    'kernel': ['linear', 'rbf']
}

In [25]:
svm = SVC()

In [26]:
random_search = RandomizedSearchCV(svm, param_dist, n_iter=10, cv=5)

In [27]:
random_search.fit(X_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(),
                   param_distributions={'C': [0.1, 1, 10],
                                        'gamma': [0.1, 1, 10],
                                        'kernel': ['linear', 'rbf']})

In [28]:
# Get the best parameters and score from the randomized search
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Best Parameters: {'kernel': 'rbf', 'gamma': 0.1, 'C': 10}
Best Score: 0.8658333333333333


In [29]:
# Use the best parameters to train the classifier on the combined training set
best_svm = SVC(**best_params)
best_svm.fit(reduced_training_data, reduced_training_labels)

SVC(C=10, gamma=0.1)

In [30]:
# Predict on the test set using the trained classifier
test_data = np.load('2reduced_test_features.npy')
test_predictions = best_svm.predict(test_data)

In [31]:
import csv

In [32]:
# Number of values per row and total number of rows
values_per_row = 3000
num_rows = 16

# Create a list of rows with file names and values
rows = []
for i in range(num_rows):
    start_index = i * values_per_row
    end_index = (i + 1) * values_per_row
    file_name = f"test{i:02d}"
    row_values = [str(value) for value in test_predictions[start_index:end_index]]
    row = [file_name] + row_values
    rows.append(row)

# Write the rows to a CSV file
csv_file = "predictions5_eval2000.csv"
with open(csv_file, mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerows(rows)

print("CSV file created successfully!")

CSV file created successfully!


In [33]:
def count_six(lst):
    count = 0
    for num in lst:
        if num == 6:
            count += 1
    return count
print(count_six(test_predictions))

1691
