In [1]:
!pip install mahotas

Collecting mahotas
  Downloading mahotas-1.4.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mahotas
Successfully installed mahotas-1.4.13


In [2]:
import pandas as pd
import cv2
from skimage import feature
import numpy as np
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import mahotas #to user pftas

In [3]:
class LocalBinaryPatterns:
    
    def __init__(self, numPoints, radius):
        self.numPoints = numPoints
        self.radius = radius
        
    def describe(self, image):
        
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        lbp = feature.local_binary_pattern(img_gray, self.numPoints,
                                           self.radius, method="uniform")
        
        n_bins = int(lbp.max() + 1)
        (hist, _) = np.histogram(lbp.ravel(),
                                 bins=n_bins,
                                 range=(0, n_bins))
        return hist

In [4]:
class Pftas:
    
    def describe(self, img):
        #cv2.imread(img_path, cv2.IMREAD_COLOR)
        return mahotas.features.tas(img)

In [5]:
class LBPDataset():
    def __init__(self, df):
        self.df = df
        self.lbp = LocalBinaryPatterns(numPoints=8, radius=2)
        self.pftas = Pftas()
        

    def __len__(self):
        return len(self.df)
    
    def get_image(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)

        if img.shape != (460, 700, 3):
            img = cv2.resize(img, (460, 700))
        return img
    
    def get_pftas(self, idx):
        img = self.get_image(self.df.iloc[idx]["path"])
        pftas_features =  self.pftas.describe(img)
        
        label = str(self.df.iloc[idx]["class"])
        return np.array(pftas_features, dtype=np.float32), label

    def get_lbp(self, idx):
        img = self.get_image(self.df.iloc[idx]["path"])
        lbp_features = self.lbp.describe(img)

        label = str(self.df.iloc[idx]["class"])
        return np.array(lbp_features, dtype=np.float32), label


In [6]:
class SVMClassifier:
    def __init__(self, C=1.0, kernel='rbf', gamma='scale', coef0=0):
        self.C = C
        self.kernel = kernel
        self.gamma = gamma
        self.coef0 = coef0
        self.clf = SVC(C=self.C, kernel=self.kernel, gamma=self.gamma, coef0=self.coef0)

    def train(self, X_train, y_train):
        self.clf.fit(X_train, y_train)

    def predict(self, X_test):
        return self.clf.predict(X_test)

    def evaluate(self, X_test, y_test):
        y_pred = self.predict(X_test)
        print("Classification Report:")
        print(classification_report(y_test, y_pred))

In [7]:
# Função para pré-processar os dados
def preprocess_data(df):
    df.rename(columns={"filename": "path"}, inplace=True)
    # Divida a coluna 'path' uma vez
    split_path = df["path"].str.split('/')
    df["filename"] = split_path.str[8]
    df["class"] = split_path.str[3]  # string
    df["slide_id"] = split_path.str[8].str.split("-").str[2]
    df["tumor_type"] = split_path.str[8].str.split("-").str[0].str.split("_").str[2]
    df["path"] = '/kaggle/input/breakhis/BreaKHis_v1/' + df["path"]
    return df

In [8]:

df = preprocess_data(pd.read_csv('/kaggle/input/breakhis/Folds.csv'))
df_train = df.query("fold == 1").query("mag == 40").query("grp == 'train'")
df_test = df.query("fold == 1").query("mag == 40").query("grp == 'test'")
train_dataset = LBPDataset(df_train)
test_dataset = LBPDataset(df_test)



In [9]:
#list comprehensions para criar as listas de características e rótulos
features_lbp_train, label_lbp_train = zip(*(train_dataset.get_lbp(idx) for idx in range(len(train_dataset))))
features_lbp_test, label_lbp_test = zip(*(test_dataset.get_lbp(idx) for idx in range(len(test_dataset))))

In [10]:
#lbp
ss = StandardScaler()
X_train = ss.fit_transform(features_lbp_train)
X_test = ss.transform(features_lbp_test)



svm_classifier = SVMClassifier()
svm_classifier.train(X_train, label_lbp_train)
svm_classifier.evaluate(X_test, label_lbp_test)

Classification Report:
              precision    recall  f1-score   support

      benign       0.71      0.23      0.35       255
   malignant       0.70      0.95      0.81       490

    accuracy                           0.70       745
   macro avg       0.71      0.59      0.58       745
weighted avg       0.71      0.70      0.65       745



In [11]:
#list comprehensions para criar as listas de características e rótulos
features_pftas_train, label_lbp_train = zip(*(train_dataset.get_pftas(idx) for idx in range(len(train_dataset))))
features_pftas_test, label_pftas_test = zip(*(test_dataset.get_pftas(idx) for idx in range(len(test_dataset))))

In [12]:
#pftas
ss = StandardScaler()
X_train = ss.fit_transform(features_pftas_train)
X_test = ss.transform(features_pftas_test)



svm_classifier = SVMClassifier()
svm_classifier.train(X_train, label_lbp_train)
svm_classifier.evaluate(X_test, label_pftas_test)

Classification Report:
              precision    recall  f1-score   support

      benign       0.89      0.64      0.74       255
   malignant       0.83      0.96      0.89       490

    accuracy                           0.85       745
   macro avg       0.86      0.80      0.82       745
weighted avg       0.85      0.85      0.84       745

