In [None]:
import numpy as np
from matplotlib import pyplot as plt
import os
import cv2 
import tensorflow as tf
import pydicom
from sklearn.model_selection import train_test_split

In [None]:
import os
import pydicom
from PIL import Image
import numpy as np


def dicom_to_jpeg(input_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(input_folder):
        if filename.endswith('.dcm'):
            dicom_path = os.path.join(input_folder, filename)

            dicom_image = pydicom.dcmread(dicom_path).pixel_array

            # Normalize to 0-255 and convert to 8-bit data
            dicom_image = np.uint8((dicom_image / np.max(dicom_image)) * 255)

            im = Image.fromarray(dicom_image)

            jpeg_path = os.path.join(
                output_folder, filename.replace('.dcm', '.jpg'))

            im.save(jpeg_path)


dicom_to_jpeg('data/dataset_another/DICOM', 'data/dataset_another/jpgs')


In [None]:
def read_csv_data(path_to_file):
    """
    Reads the CSV file and returns a list of lists containing the data
    """
    X = []
    Y = []
    with open(path_to_file, 'r') as f:
        data = f.readlines()
    for i in range(1, len(data)):
        csv_data = data[i].split(',')
        y_value = 1 if csv_data[1] == 'abnormal\n' else 0
        X.append(csv_data[0])
        Y.append(y_value)
    return np.array(X), np.array(Y)

In [None]:
csv_info = read_csv_data('data/dataset_another/all.csv')

In [None]:
csv_info

In [None]:
def read_all_train_images(folder_path, csv_info):
    """
    Reads all the images in the folder and returns a list of images
    """
    num_files = len(csv_info[0])
    images_np = np.zeros((num_files, 100, 156), dtype=np.uint8)
    valid_indices = [] 

    for i, filename in enumerate(csv_info[0]):
        img_path = os.path.join(folder_path, f"{filename}.jpg")
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (156, 100))
        images_np[i] = img
        img = img.flatten()
        if np.std(img) != 0:
            valid_indices.append(i)
    print(f"Number of valid images: {len(valid_indices)}")
    print(len(images_np))
    X = images_np[valid_indices]
    Y = csv_info[1][valid_indices]
    return X, Y


In [None]:
X, Y = read_all_train_images('data/dataset_another/jpgs', csv_info)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.8)


In [None]:
x_train = x_train.reshape(x_train.shape[0], -1)
print(x_train.shape)

In [None]:
x_test = x_test.reshape(x_test.shape[0], -1)
print(x_test.shape)

In [None]:
def plot_images(images, h, w, rows=3, cols=4):
    fig, axes = plt.subplots(rows, cols, figsize=(10, 10))
    axes = axes.ravel()

    n_images = rows * cols

    for i in range(n_images):
        if i < len(images):
            axes[i].imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
            axes[i].set_title(i, fontsize=12)
        axes[i].set_xticks([])
        axes[i].set_yticks([])
        
    # Remove empty subplots
    while i < n_images - 1:
        i += 1
        fig.delaxes(axes[i])

    plt.tight_layout()
    
plot_images(x_test, 100, 156)


In [None]:
def normalize_data(data):
    mean_of_each_image = data.mean(axis=0)
    data_normalized = data - mean_of_each_image
    # data_normalized /= np.std(data, axis=0)
    return data_normalized

In [None]:
def pca(data, h, w, approximation=0.83):
    # normalize the data
    data_normalized = normalize_data(data)
    #Find the covariance matrix
    print("Step 1")
    covariance_matrix = np.cov(data_normalized, rowvar=False)
    #Find the eigenvalues and eigenvectors of the covariance matrix
    print("Step 2")
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    print("Step 3")
    #Sort the eigenvalues and eigenvectors in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]
    print("Step 4")

    #Find the number of principal components that explain the given approximation
    total_variance = np.sum(eigenvalues)
    variance_explained = eigenvalues / total_variance
    cumulative_variance_explained = np.cumsum(variance_explained)
    number_of_components = np.argmax(cumulative_variance_explained >= approximation) + 1
    #Find the principal components
    print("Step 6")
    principal_components = eigenvectors[:, :number_of_components]
    return principal_components, data_normalized, number_of_components, cumulative_variance_explained

In [None]:
def transform_data(data, principal_components):
    data_normalized = normalize_data(data)
    return data_normalized.dot(principal_components)

In [None]:
principal_components, data_normalized, number_of_components, cumulative_variance_explained = pca(x_train, 100, 100, 0.83)


In [None]:
final_data_normalized_reduced = transform_data(x_train, principal_components)

In [None]:
final_data_normalized_reduced

In [None]:
number_of_components

In [None]:
x_train.shape


In [None]:
final_data_normalized_reduced.shape


In [None]:
def plot_variance_explained(cumulative_variance_explained, number_of_components):
    plt.figure(figsize=(10, 6))
    plt.plot(np.arange(1, number_of_components + 1), cumulative_variance_explained[:number_of_components], '-o')
    plt.xticks(np.arange(1, number_of_components + 1))
    plt.xlabel('Number of Principal Components', fontsize=12)
    plt.ylabel('Cumulative Explained Variance', fontsize=12)
    plt.title('Variance Explained by Principal Components', fontsize=14)
    plt.show()

plot_variance_explained(cumulative_variance_explained, number_of_components)


In [None]:
def plot_principal_components(principal_components, im_shape=(100, 100), n_row=10, n_col=None):
    if n_col is None:
        n_col = principal_components.shape[1] // n_row

    fig, axes = plt.subplots(n_row, n_col, figsize=(10, 10))
    axes = axes.ravel()

    for i, axi in enumerate(axes):
        if i < principal_components.shape[1]:
            img = principal_components[:, i].reshape(im_shape)
            axi.imshow(img, cmap="gray")
            axi.set_xlabel(f"PC{i+1}")
            axi.set_xticks([])
            axi.set_yticks([])
        else:
            axi.axis('off')

    plt.tight_layout()
    plt.show()

plot_principal_components(principal_components)


In [None]:
final_data_normalized_reduced_x_test = transform_data(x_test, principal_components)

In [None]:
final_data_normalized_reduced_x_test

In [None]:
#cnn classify
model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((number_of_components, 1), input_shape=(number_of_components,)),
    tf.keras.layers.Conv1D(32, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, 3, activation='relu'),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])


In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(final_data_normalized_reduced, y_train, epochs=10)

In [None]:
test_loss, test_acc = model.evaluate(final_data_normalized_reduced_x_test, y_test)

In [None]:
def evaluate_model(predicted, true):
    sum = 0
    for i in range(len(predicted)):
        if predicted[i] == true[i]:
            sum+=1
    return sum/len(predicted)

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

In [None]:
y_pred = dtc.predict(x_test)

In [None]:
evaluate_model(y_pred, y_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)

In [None]:
evaluate_model(y_pred, y_test)

In [None]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(x_train, y_train)
y_pred = svc.predict(x_test)

In [None]:
evaluate_model(y_pred, y_test)

In [186]:
with open("dental_base.csv", "r") as file:
    lines = file.readlines()
    with open("dental_base1.csv", "w") as file1:
        for i in lines:
            line = i[:-1].split(";")
            if line[1] == '':
                line[1] = '0'
            line_new = f"{line[0]};{line[1]}\n"
            
            file1.write(line_new)
    


In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)

In [None]:
evaluate_model(y_pred, y_test)