<a href="https://www.kaggle.com/code/geazzy/ic-final-project-2-lbp-svm?scriptVersionId=153885106" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

Etapas:
1. Examinar e entender os dados (Pré processamento)
2. **Criar um pipeline de entrada (Extraçao de características)**
3. Criar o modelo de classificação
4. Treine o modelo
5. Avaliar modelo

In [1]:
import pandas as pd
import cv2

def preprocess_data(df):
    df.rename(columns={"filename": "path"}, inplace=True)

    # Split the path column once
    split_path = df["path"].str.split('/')

    df["filename"] = split_path.str[8]
    df["class"] = split_path.str[3]
    df["slide_id"] = split_path.str[8].str.split("-").str[2]
    df["tumor_type"] = split_path.str[8].str.split("-").str[0].str.split("_").str[2]
    df["path"] = '/kaggle/input/breakhis/BreaKHis_v1/' + df["path"]

    return df

def get_data_per_fold(df, x):
    return df[df["fold"] == x].copy()


df = pd.read_csv('/kaggle/input/breakhis/Folds.csv')
df = preprocess_data(df)
df_fold_1 = get_data_per_fold(df, 1)


In [2]:
#https://www.mdpi.com/2313-433X/4/10/114
#n_Points is the total number of neighboring pixels 𝐼𝑝, sampled with a distance R from  𝐼𝑐
# settings for LBP
# uniform => Uniform pattern which is grayscale invariant and rotation invariant, offering finer quantization of the angular space.
#O operador LBP (P,R) produz 2^P padrões binários diferentes que podem ser formados por P pixels na sua vizinhança. 
#(P,R) significa uma vizinhança de P pontos uniformemente distribuídos com um raio R
# import the necessary packages
from skimage import feature
import numpy as np

class LocalBinaryPatterns:
    
    def __init__(self, numPoints, radius):
        self.numPoints = numPoints
        self.radius = radius
        
    def describe(self, image):
        
        img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        lbp = feature.local_binary_pattern(img_gray, self.numPoints,
                                           self.radius, method="uniform")
        
        n_bins = int(lbp.max() + 1)
        (hist, _) = np.histogram(lbp.ravel(),
                                 bins=n_bins,
                                 range=(0, n_bins))
        return hist
    
radius = 1
n_points = 8 * radius
lbp = LocalBinaryPatterns(n_points, radius)

In [3]:
def to_lbp(df):
    global lbp
    target_shape = (460, 700, 3)
    lbp_result = []
    
    for index, row in df.iterrows():
        
        img = cv2.imread(row["path"])
        if img.shape != target_shape:
            img = cv2.resize(img, (460,700))
        
        lbp_result.append(lbp.describe(img))
    return lbp_result

In [4]:
from sklearn.preprocessing import StandardScaler

df_train_1 = df.query("fold == 1").query("mag == 40").query("grp == 'train'")
df_test_1 = df.query("fold == 1").query("mag == 40").query("grp == 'test'")

y_train = df_train_1["class"].to_list()
y_test = df_test_1["class"].to_list()

ss = StandardScaler()
X_train = ss.fit_transform(to_lbp(df_train_1))
X_test = ss.transform(to_lbp(df_test_1))


In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

def svm(X_train, y_train, X_test):
    
    props={'C': 1.0, 'kernel': 'rbf', 'gamma': 'scale', 'coef0': 0}
    
    clf = SVC(C=props['C'], kernel=props['kernel'], gamma=props['gamma'], coef0=props['coef0'])
    clf.fit(X_train, y_train)
    
    return clf.predict(X_test)


y_pred = svm(X_train, y_train, X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      benign       0.65      0.19      0.30       255
   malignant       0.69      0.95      0.80       490

    accuracy                           0.69       745
   macro avg       0.67      0.57      0.55       745
weighted avg       0.68      0.69      0.63       745

