In [2]:
import numpy as np
import json
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

2025-03-25 15:39:09.107106: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-25 15:39:09.114394: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-25 15:39:09.158517: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-25 15:39:09.234295: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-25 15:39:09.259406: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [1]:


# Function to load data
def load_data():
    features_list = []
    labels_list = []
    for i in range(1, 360):  # 359 samples
        feature_file = f'features_{i}.npy'
        metadata_file = f'features_{i}_metadata.json'
        if os.path.exists(feature_file) and os.path.exists(metadata_file):
            # Load and flatten feature matrix from (20, 9) to (180,)
            features = np.load(feature_file).flatten()
            features_list.append(features)
            # Load metadata and extract Satellite_Label
            with open(metadata_file, 'r') as f:
                metadata = json.load(f)
            label = metadata['Satellite_Label']
            labels_list.append(label)
        else:
            print(f"Warning: {feature_file} or {metadata_file} not found.")
    X = np.array(features_list)  # Shape: (359, 180)
    y = np.array(labels_list)    # Shape: (359,)
    return X, y

# Load data
print("Loading data...")
X, y = load_data()

# Encode labels (65 unique satellite IDs to 0-64)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Perform standard train-test split (80-20) with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
)

# Ask user for preprocessing method
preprocess_method = input("Choose preprocessing (none, standard, minmax): ").strip().lower()

# Apply preprocessing
if preprocess_method == 'standard':
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print("Applied standardization (zero mean, unit variance).")
elif preprocess_method == 'minmax':
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    print("Applied min-max scaling (to [0, 1]).")
else:
    X_train_scaled = X_train
    X_test_scaled = X_test
    print("No preprocessing applied.")

# Function to evaluate and print classification report
def evaluate_model(model, X_test, y_test, model_name):
    y_pred = model.predict(X_test)
    if hasattr(y_pred, 'argmax'):  # For neural network predictions
        y_pred = y_pred.argmax(axis=1)
    print(f"\n{model_name} Classification Report:")
    print(classification_report(y_test, y_pred, target_names=[str(cls) for cls in le.classes_]))

# 1. Random Forest Classifier
print("Training Random Forest...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
evaluate_model(rf_model, X_test_scaled, y_test, "Random Forest")

# 2. Feedforward Neural Network
print("Training Neural Network...")
nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(180,)),
    Dense(64, activation='relu'),
    Dense(65, activation='softmax')  # 65 classes
])
nn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
y_pred_nn = nn_model.predict(X_test_scaled, verbose=0)
evaluate_model(nn_model, X_test_scaled, y_test, "Neural Network")

# 3. XGBoost Classifier
print("Training XGBoost...")
xgb_model = xgb.XGBClassifier(
    n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='mlogloss'
)
xgb_model.fit(X_train_scaled, y_train)
evaluate_model(xgb_model, X_test_scaled, y_test, "XGBoost")

hola mundo
