In [1]:
!pip install mahotas

Collecting mahotas
  Downloading mahotas-1.4.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mahotas
Successfully installed mahotas-1.4.13


In [2]:
import pandas as pd
import cv2
from skimage import feature
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import mahotas #to user pftas
import pickle #to save features vectors

In [3]:
class LocalBinaryPatterns:
    
    def __init__(self, numPoints, radius):
        self.numPoints = numPoints
        self.radius = radius
        
    def describe(self, image):
        
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        lbp = feature.local_binary_pattern(img_gray, self.numPoints,
                                           self.radius, method="uniform")
        
        n_bins = int(lbp.max() + 1)
        (hist, _) = np.histogram(lbp.ravel(),
                                 bins=n_bins,
                                 range=(0, n_bins))
        return hist

In [4]:
class Pftas:
    
    def describe(self, img):
        #cv2.imread(img_path, cv2.IMREAD_COLOR)
        return mahotas.features.tas(img)

In [5]:
class Dataset():
    def __init__(self, df):
        self.df = df
        self.lbp = LocalBinaryPatterns(numPoints=8, radius=2)
        self.pftas = Pftas()
        

    def __len__(self):
        return len(self.df)
    
    def get_image(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)

        if img.shape != (460, 700, 3):
            img = cv2.resize(img, (460, 700))
        return img
    
    def get_pftas(self, idx):
        img = self.get_image(self.df.iloc[idx]["path"])
        pftas_features =  self.pftas.describe(img)
        
        label = str(self.df.iloc[idx]["class"])
        return np.array(pftas_features, dtype=np.float32), label

    def get_lbp(self, idx):
        img = self.get_image(self.df.iloc[idx]["path"])
        lbp_features = self.lbp.describe(img)

        label = str(self.df.iloc[idx]["class"])
        return np.array(lbp_features, dtype=np.float32), label


In [6]:
# class SVMClassifier:
#     def __init__(self, C=1.0, kernel='rbf', gamma='scale', coef0=0):
#         self.C = C
#         self.kernel = kernel
#         self.gamma = gamma
#         self.coef0 = coef0
#         self.clf = SVC(C=self.C, kernel=self.kernel, gamma=self.gamma, coef0=self.coef0)

#     def train(self, X_train, y_train):
#         self.clf.fit(X_train, y_train)

#     def predict(self, X_test):
#         return self.clf.predict(X_test)

#     def evaluate(self, X_test, y_test):
#         y_pred = self.predict(X_test)
#         print("Classification Report:")
#         print(classification_report(y_test, y_pred))

In [7]:
# Função para pré-processar os dados
def preprocess_data(df):
    df.rename(columns={"filename": "path"}, inplace=True)
    # Divida a coluna 'path' uma vez
    split_path = df["path"].str.split('/')
    df["filename"] = split_path.str[8]
    df["class"] = split_path.str[3]  # string
    df["slide_id"] = split_path.str[8].str.split("-").str[2]
    df["tumor_type"] = split_path.str[8].str.split("-").str[0].str.split("_").str[2]
    df["path"] = '/kaggle/input/breakhis/BreaKHis_v1/' + df["path"]
    return df

In [8]:
df = preprocess_data(pd.read_csv('/kaggle/input/breakhis/Folds.csv'))

for zoom in [40, 100, 200, 400]:
    for grp in ["train", "test"]:
               
        df_subset = df.query("fold == 1").query(f"mag == {zoom}").query(f"grp == '{grp}'")
        dataset = Dataset(df_subset)
        
        print(f"Zoom: {zoom} - grp: {grp} - dataset len: {len(dataset)}")
        
        X_lbp, y_lbp = zip(*(dataset.get_lbp(idx) for idx in range(len(dataset))))
        X_pftas, y_pftas = zip(*(dataset.get_pftas(idx) for idx in range(len(dataset))))
        
        np.save(f"lbp_{zoom}_{grp}_X.npy", X_lbp)
        np.save(f"pftas_{zoom}_{grp}_X.npy", X_pftas)
        np.save(f"lbp_{zoom}_{grp}_y.npy", y_lbp)
        np.save(f"pftas_{zoom}_{grp}_y.npy", y_pftas)



Zoom: 40 - grp: train - dataset len: 1250
Zoom: 40 - grp: test - dataset len: 745
Zoom: 100 - grp: train - dataset len: 1321
Zoom: 100 - grp: test - dataset len: 760
Zoom: 200 - grp: train - dataset len: 1269
Zoom: 200 - grp: test - dataset len: 744
Zoom: 400 - grp: train - dataset len: 1165
Zoom: 400 - grp: test - dataset len: 655


In [9]:
#list comprehensions para criar as listas de características e rótulos
#X_lbp_train, y_lbp_train = zip(*(train_dataset.get_lbp(idx) for idx in range(len(train_dataset))))
#X_lbp_test, y_lbp_test = zip(*(test_dataset.get_lbp(idx) for idx in range(len(test_dataset))))

In [10]:
#list comprehensions para criar as listas de características e rótulos
#X_pftas_train, y_pftas_train = zip(*(train_dataset.get_pftas(idx) for idx in range(len(train_dataset))))
#X_pftas_test, y_pftas_test = zip(*(test_dataset.get_pftas(idx) for idx in range(len(test_dataset))))

In [11]:
#lbp
# ss = StandardScaler()
# X_train = ss.fit_transform(X_lbp_train)
# X_test = ss.transform(X_lbp_test)



# svm_classifier = SVMClassifier()
# svm_classifier.train(X_train, y_lbp_train)
# svm_classifier.evaluate(X_test, y_lbp_test)

In [12]:
#pftas
# ss = StandardScaler()
# X_train = ss.fit_transform(X_pftas_train)
# X_test = ss.transform(X_pftas_test)



# svm_classifier = SVMClassifier()
# svm_classifier.train(X_train, y_pftas_train)
# svm_classifier.evaluate(X_test, y_pftas_test)