In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score

# Load the data and prepare features and labels
df = pd.read_csv('allseasons.csv')
df = df.drop(['Unnamed: 0'], axis=1)

ml_df = df.iloc[:, 11:]
features = ml_df[['Surface_Salinity', 'Bottom_Salinity', 'salt_strat', 'Surface_Temp', 'Bottom_Temp', 'temp_strat']]
labels = ml_df.loc[:, 'Atl_croaker_(nibea98)':'Atl_salmon'].applymap(lambda x: 1 if x > 0 else 0)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=109)

# Filter out columns with only one unique value
valid_columns = [col for col in y_train.columns if y_train[col].nunique() > 1]
y_train_filtered = y_train[valid_columns]
y_test_filtered = y_test[valid_columns]

# Train a multi-output classifier with Logistic Regression
# clf = MultiOutputClassifier(LogisticRegression(solver='lbfgs', max_iter=1000))
clf = MultiOutputClassifier(GaussianNB())
clf.fit(X_train, y_train_filtered)
y_pred = clf.predict(X_test)

# Calculate accuracy for each label
accuracies = {}
for i, col in enumerate(valid_columns):
    accuracies[col] = accuracy_score(y_test_filtered.iloc[:, i], y_pred[:, i])

# Calculate overall accuracy
overall_accuracy = sum(accuracies.values()) / len(accuracies)

# Print the results
for species, accuracy in accuracies.items():
    print(f'Accuracy for {species}: {accuracy:.4f}')

print(f'Overall accuracy: {overall_accuracy:.4f}')


Accuracy for Atl_croaker_(nibea98): 0.8846
Accuracy for Bay_anchovy: 0.9615
Accuracy for Black_drum_or_Spot: 0.7692
Accuracy for Black_sea_bass: 0.8462
Accuracy for Brd_striped_anchovy: 0.8077
Accuracy for Little_tunny_or_skipjack_tuna: 0.8462
Accuracy for Nor_sea_robin: 0.9615
Accuracy for Scup: 0.7308
Accuracy for Smallmouth_flounder: 0.6154
Accuracy for Southern_kingfish(nibea95): 0.5769
Accuracy for Str_sea_robin: 0.5769
Accuracy for Summ_flounder: 0.6923
Accuracy for Summ_flounder99a: 0.5385
Accuracy for Weakfish_Cyn: 0.8077
Accuracy for Windowpane_flounder: 1.0000
Accuracy for Am_butterfish: 0.8846
Accuracy for Atl_chub_mackerel: 1.0000
Accuracy for Frigate_or_bullet_tuna: 0.9231
Accuracy for Giant_trevally99: 0.8077
Accuracy for Hogchoker_trinectes: 0.7308
Accuracy for Nor_kingfish: 0.7308
Accuracy for Red_White_or_Spotted_hake: 0.8846
Accuracy for Spanish_mackerel: 0.9231
Accuracy for Tautog: 0.6538
Accuracy for Thread_herring: 0.8077
Accuracy for Atl_menhaden_LS16_or_river_her