In [1]:
# K-Nearest Neighbors Approach
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score

# Load and process data
df = pd.read_csv("solar_flare_data_smote.csv")
features = ['xrsb_flux', 'background_flux', 'integrated_flux']
target = 'flare_class_peak'
df[features] = df[features].fillna(df[features].mean())
label_encoder = LabelEncoder()
df[target] = label_encoder.fit_transform(df[target])
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42)


knn = KNeighborsClassifier(n_neighbors=5) 
knn.fit(X_train, y_train)
knn_preds = knn.predict(X_test)

print("K-Nearest Neighbors:")
print(classification_report(y_test, knn_preds))
print(confusion_matrix(y_test, knn_preds))
print(f"Balanced Accuracy:{balanced_accuracy_score(y_test, knn_preds)}")

# Random Forest Approach
from sklearn.ensemble import RandomForestClassifier

df_cleaned = pd.read_csv("solar_flare_data_smote.csv")
features = ['xrsb_flux', 'background_flux', 'integrated_flux'] 
target = 'flare_class_peak'
df_cleaned[features] = df_cleaned[features].fillna(df_cleaned[features].mean())

label_encoder = LabelEncoder()
df_cleaned[target] = label_encoder.fit_transform(df_cleaned[target])
scaler = StandardScaler()
df_cleaned[features] = scaler.fit_transform(df_cleaned[features])
X_train, X_test, y_train, y_test = train_test_split(df_cleaned[features], df_cleaned[target], test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(random_state=42, class_weight='balanced') # added class weight to balance.
rf_model.fit(X_train, y_train)

rf_preds = rf_model.predict(X_test)
print("Random Forest:")
print(classification_report(y_test, rf_preds))
print(confusion_matrix(y_test, rf_preds))
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, rf_preds)}")


K-Nearest Neighbors:
              precision    recall  f1-score   support

           0       0.90      0.98      0.94      1701
           1       0.74      0.67      0.70      1730
           2       0.75      0.67      0.71      1729
           3       0.80      0.89      0.84      1740

    accuracy                           0.80      6900
   macro avg       0.80      0.80      0.80      6900
weighted avg       0.80      0.80      0.80      6900

[[1663   32    2    4]
 [ 162 1159  273  136]
 [  19  292 1165  253]
 [   5   85  108 1542]]
Balanced Accuracy:0.8019022943230345
Random Forest:
              precision    recall  f1-score   support

           0       0.91      0.98      0.95      1701
           1       0.75      0.73      0.74      1730
           2       0.78      0.73      0.75      1729
           3       0.85      0.88      0.87      1740

    accuracy                           0.83      6900
   macro avg       0.83      0.83      0.83      6900
weighted avg       

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=059000a2-e207-4dc7-9f7e-f909ccf665aa' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>