In [29]:
import pandas as pd
import joblib
from sklearn.decomposition import PCA
from skimage.feature import hog
from skimage.io import imread
from skimage.transform import rescale
from sklearn.metrics import accuracy_score
import numpy as np
import os
from tqdm import tqdm
# Путь к папке с изображениями
images_folder = "/Users/aleksey/PycharmProjects/MediScan/Aleksey/ISIC2018_Task3_Training_Input"

# Получаем список файлов изображений в папке
image_files = [f for f in os.listdir(images_folder) if f.endswith(('jpg', 'jpeg', 'png'))]

# Инициализация массива для одномерных векторов
image_vectors = []

# Проход по каждому изображению
for image_file in tqdm(image_files):
    # Загрузка изображения
    image_path = os.path.join(images_folder, image_file)
    image = imread(image_path, as_gray=True)
    
    image = rescale(image, 1/3, mode='reflect')
    img_hog, hog_img = hog(
    image, pixels_per_cell=(14,14), 
    cells_per_block=(2, 2), 
    orientations=9, 
    visualize=True, 
    block_norm='L2-Hys')
    flat_vector = np.array(hog_img).flatten()
    image_vectors.append(flat_vector)

image_vectors_array = np.array(image_vectors)

pca = PCA(n_components=100)
pca.fit(image_vectors_array)
image_vectors_array = pca.transform(image_vectors_array)

class_dict = {"MEL": 1, "NV": 2, "BCC": 3, "AKIEC": 4, "BKL":5, "DF": 6, "VASC": 7}
results = pd.read_csv("/Users/aleksey/PycharmProjects/MediScan/Aleksey/ISIC2018_Task3_Training_GroundTruth.csv")
res_labels = results.drop("image", axis=1)
decoded_labels = res_labels.apply(lambda row: row.idxmax(), axis=1)
results['class'] = decoded_labels
results = results[["image","class"]]
results["image"] = results["image"] + ".jpg"
columns = [f"Pixel_{i}" for i in range(image_vectors_array.shape[1])]
df = pd.DataFrame(data=image_vectors_array, columns=columns)
df['image'] = image_files
data = pd.merge(df, results, on="image")
data["class"] = data["class"].replace(class_dict)
X_train = data.drop(["image", "class"], axis=1)
y_train = data["class"]

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)


# Путь к папке с изображениями
images_folder = "/Users/aleksey/PycharmProjects/MediScan/Aleksey/ISIC2018_Task3_Test_Input"

# Получаем список файлов изображений в папке
image_files = [f for f in os.listdir(images_folder) if f.endswith(('jpg', 'jpeg', 'png'))]

# Инициализация массива для одномерных векторов
image_vectors = []

# Проход по каждому изображению
for image_file in tqdm(image_files):
    # Загрузка изображения
    image_path = os.path.join(images_folder, image_file)
    image = imread(image_path, as_gray=True)
    
    image = rescale(image, 1/3, mode='reflect')
    img_hog, hog_img = hog(
    image, pixels_per_cell=(14,14), 
    cells_per_block=(2, 2), 
    orientations=9, 
    visualize=True, 
    block_norm='L2-Hys')
    flat_vector = np.array(hog_img).flatten()
    image_vectors.append(flat_vector)

image_vectors_array = np.array(image_vectors)
image_vectors_array = pca.transform(image_vectors_array)
columns = [f"Pixel_{i}" for i in range(image_vectors_array.shape[1])]
df = pd.DataFrame(data=image_vectors_array, columns=columns)
df['image'] = image_files
results = pd.read_csv("/Users/aleksey/PycharmProjects/MediScan/Aleksey/ISIC2018_Task3_Test_GroundTruth.csv")
res_labels = results.drop("image", axis=1)
decoded_labels = res_labels.apply(lambda row: row.idxmax(), axis=1)
results['class'] = decoded_labels
results = results[["image","class"]]
results["image"] = results["image"] + ".jpg"
data = pd.merge(df, results, on="image")
data["class"] = data["class"].replace(class_dict)
X_test = data.drop(["image", "class"], axis=1)
y_test = data["class"]
X_test = scaler.transform(X_test)

100%|██████████| 10015/10015 [02:48<00:00, 59.28it/s]
100%|██████████| 1512/1512 [00:26<00:00, 56.77it/s]


In [30]:
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
lr_cls = SGDClassifier()
parameters = {'max_iter': [1000, 2000, 5000, 10000, 20000, 40000, 50000, 100000],
              "tol": [0.004, 0.003, 0.002, 0.001, 0.0005, 0.0004, 0.0003, 0.0002, 0.0001]}
grid_search = GridSearchCV(estimator = lr_cls,  
                           param_grid = parameters,
                           scoring="accuracy",
                           verbose=2,
                           cv = 10)


grd = grid_search.fit(X_train, y_train)  
print("tuned hpyerparameters :(best parameters) ",grd.best_params_)
print("accuracy :",grd.best_score_)

Fitting 10 folds for each of 72 candidates, totalling 720 fits
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.5s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.6s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.5s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.5s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.6s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.6s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.5s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.6s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.5s
[CV] END ...........................max_iter=1000, tol=0.004; total time=   0.6s
[CV] END ...........................max_iter=1000, tol=0.003; total time=   0.7s
[CV] END ...........................max_iter=1

In [31]:
lr_cls = SGDClassifier(max_iter=5000, tol=0.0003)
lr_cls.fit(X_train, y_train)  
predictions = lr_cls.predict(X_test)
print("{:.2%}".format(accuracy_score(y_test, predictions)))

57.41%


In [32]:
joblib.dump(lr_cls, '/Users/aleksey/PycharmProjects/MediScan/Aleksey/MVP model/sgd_model.pkl')
joblib.dump(pca, '/Users/aleksey/PycharmProjects/MediScan/Aleksey/MVP model/pca.pkl')
joblib.dump(scaler, '/Users/aleksey/PycharmProjects/MediScan/Aleksey/MVP model/sc.pkl')

['/Users/aleksey/PycharmProjects/MediScan/Aleksey/MVP model/sc.pkl']

In [33]:
model = joblib.load("sgd_model.pkl")
pca = joblib.load("pca.pkl")

In [34]:
predictions = model.predict(X_test)
print("{:.2%}".format(accuracy_score(y_test, predictions)))

57.41%
