In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('/kaggle/input/14features-020-jak/features_020_jakteristics_14.csv')

In [None]:
data = data.drop(['Unnamed: 0'], axis=1)

In [None]:
data = data.dropna()

In [None]:
data = data.reset_index()
data

In [None]:
data = data.astype(float)

In [None]:
grouped = data.groupby(data['label'])

In [None]:
averages = grouped.mean()
variances = grouped.var()
averages

In [None]:
def compute_covariance_matrix(data, regularization=0):
    cov_matrix = np.cov(data, rowvar=False)
    cov_matrix += regularization * np.eye(cov_matrix.shape[0])
    return cov_matrix
def fit(x_train, y_train):
    y_train = y_train.ravel()
    m = y_train.shape[0] 
    x_train = x_train.reshape(m, -1)
    input_feature = x_train.shape[1]
    class_label = 9
    mu = np.zeros((class_label, input_feature))
    sigma = np.zeros((class_label, input_feature, input_feature))
    phi = np.zeros(class_label)

    for label in range(class_label):
        indices = (y_train == label)
        phi[label] = float(np.sum(indices)) / m
        mu[label] = np.mean(x_train[indices, :], axis=0)
        sigma[label] = compute_covariance_matrix(x_train[indices, :])
    
    return phi, mu, sigma

In [None]:
data.columns

In [None]:
has_nan = data.isnull().values.any()
has_nan

In [None]:
data.columns

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
columns_to_scale = ['z', 'eigenvalue_sum', 'omnivariance', 'eigenentropy',
       'anisotropy', 'planarity', 'linearity', 'PCA1', 'PCA2',
       'surface_variation', 'sphericity', 'verticality', 'nx', 'ny', 'nz']

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[columns_to_scale])
scaled_df = pd.DataFrame(scaled_data, columns=columns_to_scale)
data[columns_to_scale] = scaled_df
data

In [None]:
data = data.dropna()

In [None]:
# x = data[['Column1','Column2','Column3','Column4','Column5','Column6','Column7','Column8']]
X = data[['z', 'eigenvalue_sum', 'omnivariance', 'eigenentropy',
       'anisotropy', 'planarity', 'linearity', 'PCA1', 'PCA2',
       'surface_variation', 'sphericity', 'verticality', 'nx', 'ny', 'nz']]

y = data[['label']]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train_values = X_train.values
y_train_values = y_train.values

In [None]:
phi, mu, sigma = fit(X_train_values, y_train_values)

In [None]:
phi

In [None]:
for label in (1,2,5,8):
    print(np.linalg.eigvals(sigma[label]))

In [None]:
import math
def multivariate_gaussian_pdf(x, mean, cov):
    d = mean.shape[0]
    exponent = -0.5 * np.dot(np.dot((x - mean).T, np.linalg.inv(cov)), (x - mean))
    prefactor = 1 / np.sqrt(((2 * np.pi) ** d )*(np.linalg.det(cov)))
    return np.exp(exponent)*prefactor

In [None]:
def is_positive_semidefinite(matrix):
    eigenvalues, _ = np.linalg.eig(matrix)
    print(eigenvalues)
    return np.all(eigenvalues >= 0)

matrix = sigma[1] 
print(matrix)
positive_semidefinite = is_positive_semidefinite(matrix)
if positive_semidefinite:
    print("The matrix is positive semidefinite.")
else:
    print("The matrix is not positive semidefinite.")

In [None]:
def give_epistemic(X_test):
    x_test = X_test.values
    feature_densities = []
    for i in range (x_test.shape[0]):
        rel_probs = []
        deno = 0
        for label in (1,2,5,8):
            x = multivariate_gaussian_pdf(x_test[i], mu[label], sigma[label])
            deno += x
            rel_probs.append(x)
        probs = [x/deno for x in rel_probs]
        feature_density = 0
        labels = [1,2,5,8]
        for j in range (len(labels)):
            feature_density += phi[labels[j]]*probs[j]
        feature_densities.append([x_test[i], feature_density])
    epistemic_uncertainty = []
    for i in feature_densities:
        epistemic_uncertainty.append(1-i[1])
    return epistemic_uncertainty

In [None]:
def get_aleatoric(X_test, softmax_probs):
    entropies = []
    sum_probs = []
    for i in range (len(softmax_probs)):
#         sum_prob = 0
        for j in softmax_probs[i]:
            entropy = 0
            if (j == 0):
                continue
            else:
                entropy+= -j*np.log(j)
#             sum_prob += j

#         sum_probs.append(sum_prob)   
        entropies.append(entropy)
    return entropies

In [None]:
X_epistemic = give_epistemic(X)

In [None]:
X['epistemic'] = X_epistemic
data_new = pd.concat([X, y], axis=1)
data_new = data_new.dropna()

In [None]:
X_new = data_new[['z', 'eigenvalue_sum', 'omnivariance', 'eigenentropy',
       'anisotropy', 'planarity', 'linearity', 'PCA1', 'PCA2',
       'surface_variation', 'sphericity', 'verticality', 'nx', 'ny', 'nz', 'epistemic']]

y_new = data_new[['label']]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=45)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow import keras
from tensorflow.keras.layers import Dropout

# normalized_uncertainty = (epistemic_uncertainty - epistemic_uncertainty.min()) / (epistemic_uncertainty.max() - epistemic_uncertainty.min())

# weights = 1 - normalized_uncertainty

num_classes = 4
classes_present = [1, 2, 5, 8]
class_mapping = {cls: i for i, cls in enumerate(classes_present)}
y_mapped = y_train['label'].map(class_mapping)
y_onehot = tf.one_hot(y_mapped, depth=num_classes)


# Define your neural network architecture
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(16,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(4, activation='softmax') 
])

# Compile the model with the custom loss function
model.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

# Train the model
model.fit(X_train, y_onehot, epochs=20, batch_size=32)


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_test_mapped = y_test['label'].map(class_mapping)
y_test_mapped = y_test_mapped.to_numpy()
accuracy = accuracy_score(y_test_mapped, y_pred)

print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
epistemic_uncertainty = X_train['epistemic'].values
X_train = X_train.drop(['epistemic'], axis=1)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow import keras
from tensorflow.keras.layers import Dropout

normalized_uncertainty = (epistemic_uncertainty - epistemic_uncertainty.min()) / (epistemic_uncertainty.max() - epistemic_uncertainty.min())

weights = 1 - normalized_uncertainty

num_classes = 4
classes_present = [1, 2, 5, 8]
class_mapping = {cls: i for i, cls in enumerate(classes_present)}
y_mapped = y_train['label'].map(class_mapping)
y_onehot = tf.one_hot(y_mapped, depth=num_classes)

def weighted_categorical_crossentropy(weights):
    def loss(y_true, y_pred):
        # Compute the categorical cross-entropy loss
        cce = tf.keras.losses.CategoricalCrossentropy()
        unweighted_loss = cce(y_true, y_pred)
        
        # Apply weights to the loss
        weighted_loss = unweighted_loss * weights
        return tf.reduce_mean(weighted_loss)
    return loss

# Define your neural network architecture
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(15,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(4, activation='softmax') 
])

# Compile the model with the custom loss function
model.compile(optimizer='adam', loss=weighted_categorical_crossentropy(weights),  metrics=['accuracy'])

# Train the model
model.fit(X_train, y_onehot, epochs=20, batch_size=32)


In [None]:
X_test = X_test.drop(['epistemic'], axis=1)

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_test_mapped = y_test['label'].map(class_mapping)
y_test_mapped = y_test_mapped.to_numpy()
accuracy = accuracy_score(y_test_mapped, y_pred)

print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow import keras
from tensorflow.keras.layers import Dropout

# normalized_uncertainty = (epistemic_uncertainty - epistemic_uncertainty.min()) / (epistemic_uncertainty.max() - epistemic_uncertainty.min())

# weights = 1 - normalized_uncertainty

num_classes = 4
classes_present = [1, 2, 5, 8]
class_mapping = {cls: i for i, cls in enumerate(classes_present)}
y_mapped = y_train['label'].map(class_mapping)
y_onehot = tf.one_hot(y_mapped, depth=num_classes)


# Define your neural network architecture
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(15,)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(4, activation='softmax') 
])

# Compile the model with the custom loss function
model.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])

# Train the model
model.fit(X_train, y_onehot, epochs=20, batch_size=32)


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

y_test_mapped = y_test['label'].map(class_mapping)
y_test_mapped = y_test_mapped.to_numpy()
accuracy = accuracy_score(y_test_mapped, y_pred)

print(f"Test Accuracy: {accuracy * 100:.2f}%")