In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import classification_report, accuracy_score

# Load data
labels = np.load('../assets/seed-dataset/LabelsNoImage.npz')['arr_0']  # Assuming 'arr_0' contains labels
subjects = np.load('../assets/seed-dataset/SubjectsNoImage.npz')['arr_0']  # Assuming 'arr_0' contains subject data
dataset = np.load('../assets/seed-dataset/DatasetCaricatoNoImage.npz')['arr_0']  # Assuming 'arr_0' contains features


# Check the shape of the dataset
print(f"Original dataset shape: {dataset.shape}")

# If the dataset is 3D, reshape it to 2D
if len(dataset.shape) == 3:
    dataset = dataset.reshape(dataset.shape[0], -1)  # Flatten the dataset

print(f"Reshaped dataset shape: {dataset.shape}")

# Step 1: Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.2, random_state=42)

# Step 2: Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Apply PCA for dimensionality reduction
pca = PCA(n_components=0.95)  # Preserve 95% of variance
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Step 4: Apply LDA for classification
lda = LDA()
lda.fit(X_train_pca, y_train)
y_pred = lda.predict(X_test_pca)

# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Negative', 'Neutral', 'Positive'])



print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:')
print(report)


Original dataset shape: (50910, 5, 62)
Reshaped dataset shape: (50910, 310)
Accuracy: 0.7728
Classification Report:
              precision    recall  f1-score   support

    Negative       0.71      0.74      0.72      3301
     Neutral       0.74      0.76      0.75      3372
    Positive       0.87      0.82      0.85      3509

    accuracy                           0.77     10182
   macro avg       0.77      0.77      0.77     10182
weighted avg       0.78      0.77      0.77     10182



In [2]:
print('hello')

he
