In [2]:
import numpy as np
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import torch 
import torch.nn as nn 

In [3]:
from DualArchitecture import DualTextCNN
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_SAVE_PATH = "C:/Users/Korhan/Desktop/workspace/vsCodeWorkspace/Python_Workspace/mental_health_sentiment_analysis/best_newdualtextcnnModel.pt"

model = DualTextCNN(input_dim=300, num_classes=7, latent_dim=32).to(device=DEVICE)
model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=torch.device(DEVICE)))

<All keys matched successfully>

In [4]:
from TextDataset import TextDataset
from torch.utils.data import Dataset, DataLoader
BATCH_SIZE = 16

X_train_embeddings = np.load("X_train_embeddings.npy")
y_train_encoded = np.load("y_train_encoded.npy")
X_test_embeddings = np.load("X_test_embeddings.npy")
y_test_encoded = np.load("y_test_encoded.npy")

X_train_embeddings = np.array(X_train_embeddings)
X_test_embeddings = np.array(X_test_embeddings)

y_train_encoded = np.array(y_train_encoded)
y_test_encoded = np.array(y_test_encoded)

y_train_encoded = np.reshape(y_train_encoded, (-1,))
y_test_encoded = np.reshape(y_test_encoded, (-1,))


train_dataset = TextDataset(X_train_embeddings, y_train_encoded)
test_dataset = TextDataset(X_test_embeddings, y_test_encoded)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
model.eval()
latent_embeddings = []
pred_logits = []
with torch.no_grad():
    for X, y in train_dataloader:
        X, y = X.to(DEVICE).unsqueeze(1), y.to(DEVICE).squeeze(1)
        y_pred, rec_pred ,latent_embedding_batch = model(X)
        pred_logits.append(y_pred)
        latent_embeddings.append(latent_embedding_batch)

In [6]:
latent_embeddings = torch.cat(latent_embeddings, dim=0)
pred_logits = torch.cat(pred_logits, dim=0)
print("Final latent embeddings shape:", latent_embeddings.shape)
print("Final pred logits shape:", pred_logits.shape)

Final latent embeddings shape: torch.Size([84868, 32])
Final pred logits shape: torch.Size([84868, 7])


In [7]:
np.save("latent_embeddings_from_dual_architecture.npy", latent_embeddings.cpu().numpy())

In [8]:
combined_features = torch.cat( (latent_embeddings, pred_logits) , dim=-1)

print("Combined Features shape : ", combined_features.shape)

Combined Features shape :  torch.Size([84868, 39])


In [9]:
combined_features_np = combined_features.detach().cpu().numpy()

In [10]:
from sklearn.mixture import GaussianMixture

latent_embeddings_np = latent_embeddings.cpu().numpy()

num_components = 7

from sklearn.model_selection import GridSearchCV
from sklearn.mixture import GaussianMixture

param_grid = {
    'n_components': [5, 6, 7],
    'covariance_type': ['full', 'tied', 'diag', 'spherical'],
    'max_iter': [100, 200, 500],
}

gmm = GaussianMixture()
grid_search = GridSearchCV(gmm, param_grid, cv=3, n_jobs=-1)
grid_search.fit(latent_embeddings_np)

best_model = grid_search.best_estimator_
print("Best Model: ", best_model)

print("Gaussian Means Shape:", best_model.means_.shape)
print("Covariances Shape:", best_model.covariances_.shape)

Best Model:  GaussianMixture(max_iter=500, n_components=7)
Gaussian Means Shape: (7, 32)
Covariances Shape: (7, 32, 32)


In [11]:
true_labels = []
with torch.no_grad():
    for X, y in train_dataloader:
        true_labels.append(y.cpu().numpy())
        
true_labels = np.concatenate(true_labels, axis=0)

In [12]:
probs = best_model.predict_proba(latent_embeddings_np)
assignments = np.argmax(probs, axis=1)

In [13]:
print("Gerçek Etiketler (İlk 10):", true_labels[:5])
print("GMM Atamaları (İlk 10):", assignments[:5])

Gerçek Etiketler (İlk 10): [[2]
 [5]
 [6]
 [3]
 [3]]
GMM Atamaları (İlk 10): [5 3 4 5 5]


In [14]:
from sklearn.metrics import adjusted_rand_score

ari_score = adjusted_rand_score(true_labels.squeeze(1), assignments)
print("Adjusted Rand Index (ARI):", ari_score)

Adjusted Rand Index (ARI): 0.2232536468592531


In [15]:
param_grid = {
    'n_components': [5, 6, 7],
    'covariance_type': ['full', 'tied', 'diag', 'spherical'],
    'max_iter': [100, 200, 500],
}

gmm = GaussianMixture()
grid_search = GridSearchCV(gmm, param_grid, cv=3, n_jobs=-1)
grid_search.fit(combined_features_np)

best_model = grid_search.best_estimator_
print("Best Model: ", best_model)

print("Gaussian Means Shape:", best_model.means_.shape)
print("Covariances Shape:", best_model.covariances_.shape)

Best Model:  GaussianMixture(n_components=7)
Gaussian Means Shape: (7, 39)
Covariances Shape: (7, 39, 39)


In [16]:
probs = best_model.predict_proba(combined_features_np)
assignments = np.argmax(probs, axis=1)

In [17]:
print("Gerçek Etiketler (İlk 10):", true_labels[:5])
print("GMM Atamaları (İlk 10):", assignments[:5])

Gerçek Etiketler (İlk 10): [[2]
 [5]
 [6]
 [3]
 [3]]
GMM Atamaları (İlk 10): [6 1 4 6 6]


In [18]:
ari_score = adjusted_rand_score(true_labels.squeeze(1), assignments)
print("Adjusted Rand Index (ARI):", ari_score)

Adjusted Rand Index (ARI): 0.36611007981781873
