## Required Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from PIL import Image
import requests
from io import BytesIO
from pathlib import Path
import os
from tqdm import tqdm
import ssl
import os
import socket
from tqdm import tqdm
import urllib.request
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.svm import SVC

## Data Preparation

In [3]:
# Define and set the project root directory as the current working directory
project_root = Path().resolve().parent
os.chdir(project_root)

In [4]:
# Create the filename and path
folder_path= os.path.join(project_root, 'data/processed')
file_name = "sample_df.csv"
file_path = os.path.join(folder_path, file_name)
df = pd.read_csv(file_path)
df = df.dropna(subset=["KumasDeseni", "NumuneResim"])
df.head()

Unnamed: 0,ModelKod,UrunOptionRef,CepDetayi,KolBoyu,KumasDeseni,YakaTipi,LisansTanim,LisansKarakterTanim,NumuneResim
1,S4GO09Z8,3038464,CEPSİZ,UZUN KOL,YOK,BİSİKLET YAKA,,,https://teknikfoy.lcwaikiki.local/ModelResim.a...
4,S52828Z8,3302747,CEPSİZ,KISA KOL,YOK,BİSİKLET YAKA,,,https://teknikfoy.lcwaikiki.local/ModelResim.a...
7,W53916Z8,3503947,CEPSİZ,,DÜZ,GÖMLEK YAKA+ DÜĞMELİ,,,https://teknikfoy.lcwaikiki.local/ModelResim.a...
9,S42345Z8,2900784,,KISA KOL,DÜZ,HAKİM YAKA,,,https://teknikfoy.lcwaikiki.local/ModelResim.a...
10,W4KG67Z8,3403948,,,MINIMAL DESEN,,,,https://teknikfoy.lcwaikiki.local/ModelResim.a...


In [5]:
df.shape

(6328, 9)

In [6]:
df.UrunOptionRef.nunique()

6328

In [7]:
df.KumasDeseni.nunique()

71

In [None]:
# Proxy ayarları
socket.setdefaulttimeout(10)  # Zaman aşımını uzat
proxy_support = urllib.request.ProxyHandler({
    'http': 'http://10.62.0.168:8080',
    'https': 'http://10.62.0.168:8080'
})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)

# Proje yolları

file_path = os.path.join(project_root, 'data/raw/RBM_images')
error_log = os.path.join(project_root, 'data/raw/RBM_logs/errors.log')

# Dosya yolu yoksa oluştur
os.makedirs(file_path, exist_ok=True)
os.makedirs(os.path.dirname(error_log), exist_ok=True)

# SSL doğrulamasını devre dışı bırak
context = ssl._create_unverified_context()

# Daha önce indirilen dosyaları atla
for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Downloading Images", unit="file"):
    image_url = row['NumuneResim'].replace(' ', '%20')  # Boşlukları encode et
    file_name = f"{row['UrunOptionRef']}.jpg"  # UrunOptionRef'i dosya adı olarak kullan
    full_path = os.path.join(file_path, file_name)  # Tam dosya yolu

    # Eğer dosya zaten varsa indirmeden atla
    if os.path.exists(full_path):
        print(f"Skipping {file_name} (already exists)")
        continue

    try:
        # Görseli indir
        response = urllib.request.urlopen(image_url, timeout=20, context=context)
        with open(full_path, 'wb') as out_file:
            out_file.write(response.read())
        print(f"Downloaded: {file_name}")
    except Exception as e:
        error_message = f"Skipping {image_url} due to error: {e}\n"
        print(error_message)
        # Hataları log dosyasına yaz
        with open(error_log, 'a', encoding='utf-8') as log_file:
            log_file.write(error_message)

# Hatalı dosyaların 'UrunOptionRef' değerlerini listeye ekleyin
error_files = [3140005, 3086255, 3220628, 2928716, 3143220, 2993807, 3220105, 2953228, 3062170, 2901937]

# SSL doğrulamasını devre dışı bırak
context = ssl._create_unverified_context()

# Sadece hatalı dosyaları yeniden indir
for ref in tqdm(error_files, desc="Re-downloading Images", unit="file"):
    image_url = df.loc[df['UrunOptionRef'] == ref, 'NumuneResim'].values[0].replace(' ', '%20')
    file_name = f"{ref}.jpg"
    full_path = os.path.join(file_path, file_name)

    try:
        response = urllib.request.urlopen(image_url, timeout=20, context=context)
        with open(full_path, 'wb') as out_file:
            out_file.write(response.read())
        print(f"Successfully re-downloaded: {file_name}")
    except Exception as e:
        error_message = f"Failed to re-download {ref}: {e}\n"
        print(error_message)
        with open(error_log, 'a', encoding='utf-8') as log_file:
            log_file.write(error_message)

# Disable SSL verification
context = ssl._create_unverified_context()

# Başarısız dosyaların listesini manuel olarak oluşturun
failed_files = [
    3140005,
    3086255,
    3220628,
    2928716,
    3143220,
    2993807,
    3220105,
    2953228,
    3062170,
    2901937
]
#['3220628', '3220105', '3062170']#["3220628", "3220105", "3062170"]   
# Kayıtların saklanacağı ana dizin
file_path = "D:\\Projects\\attribute_extraction_from_image\\data\\raw\\RBM_images"

# Log dosyası (hata kayıtları için)
error_log = os.path.join(file_path, "error_log.txt")

# Dosyaları yeniden indir
for file_id in tqdm(failed_files, desc="Re-downloading Images", unit="file"):
    image_url = df.loc[df['UrunOptionRef'] == int(file_id), 'NumuneResim'].values[0].replace(' ', '%20')
    file_name = f"{file_id}.jpg"
    full_path = os.path.join(file_path, file_name)

    try:
        response = urllib.request.urlopen(image_url, timeout=20, context=context)
        with open(full_path, 'wb') as out_file:
            out_file.write(response.read())
        print(f"Successfully re-downloaded: {file_name}")
    except Exception as e:
        error_message = f"Failed to re-download {file_id}: {e}\n"
        print(error_message)
        # Log the error
        with open(error_log, 'a', encoding='utf-8') as log_file:
            log_file.write(error_message)

# Hata alan dosyalar
failed_files = ["3220628", "3220105", "2953228"] #3062170

# Başarısız dosyaları DataFrame'den çıkar
df_filtered = df[~df["UrunOptionRef"].isin(failed_files)]

df_filtered.shape
# Hatalı id'leri liste olarak belirleyelim
error_ids = [
    3140005,
    3086255,
    3220628,
    2928716,
    3143220,
    2993807,
    3220105,
    2953228,
    3062170,
    2901937
]

# Hatalı ID'leri df_filtered'dan çıkaralım
df_filtered_cleaned = df_filtered[~df_filtered['UrunOptionRef'].isin(error_ids)]

In [51]:
df_filtered_cleaned.columns


Index(['ModelKod', 'UrunOptionRef', 'CepDetayi', 'KolBoyu', 'KumasDeseni',
       'YakaTipi', 'LisansTanim', 'LisansKarakterTanim', 'NumuneResim',
       'KumasDeseniEncoded'],
      dtype='object')

In [70]:
df_fabric = df_filtered_cleaned[['ModelKod', 'UrunOptionRef', 'KumasDeseni', 'NumuneResim']]

# KumasDeseni kolonunda sınıf sayımlarını hesaplama
category_counts = df_fabric['KumasDeseni'].value_counts()

# Pareto analizi için kümülatif yüzde hesaplama
category_percentages = (category_counts / category_counts.sum()) * 100
category_cumulative = category_percentages.cumsum()

# Veri setinin %80'ini oluşturan sınıfları bulma
pareto_classes = category_cumulative[category_cumulative <= 85].index
print(pareto_classes)

# Sonuçları birleştirerek görselleştirme
pareto_analysis = pd.DataFrame({
    'Count': category_counts,
    'Percentage': category_percentages,
    'Cumulative_Percentage': category_cumulative
})
pareto_analysis


Index(['DÜZ', 'YOK', 'METRAJ BASKILI', 'BASKILI', 'ÇİZGİLİ'], dtype='object', name='KumasDeseni')


Unnamed: 0_level_0,Count,Percentage,Cumulative_Percentage
KumasDeseni,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DÜZ,3300,52.231719,52.231719
YOK,638,10.098132,62.329851
METRAJ BASKILI,518,8.198797,70.528648
BASKILI,416,6.584362,77.113010
ÇİZGİLİ,400,6.331117,83.444128
...,...,...,...
MİKRO EKOSE,1,0.015828,99.936689
EMBELLİSHED,1,0.015828,99.952517
KARELİ,1,0.015828,99.968344
BASKILI/DÜZ,1,0.015828,99.984172


In [71]:
# Türkçe sınıflar
turkce_siniflar = ["DÜZ", "EKOSE", "ÇİÇEKLİ", "GEOMETRİK", "HAYVAN DESENLİ",
                   "MONOCROM", "TİPOGRAFİ", "PUANTİYE", "ETNİK", "TROPİK",
                   "ÇİZGİLİ", "ŞAL DESENİ", "BLOK"]

# Büyük harfe çevirme
df_fabric.loc[:, 'KumasDeseni'] = df_fabric['KumasDeseni'].str.upper()

# Türkçe sınıflar dışındakileri 'DİĞER' yapma
df_fabric.loc[:, 'KumasDeseni'] = df_fabric['KumasDeseni'].apply(lambda x: x if x in pareto_classes else 'DİĞER')

In [None]:
# Görselleri işleyen fonksiyon
def process_image_from_id(product_id, img_dir, img_size=(64, 64)):
    """
    Ürün ID'sine göre ilgili görseli işleme.
    
    Args:
        product_id (str): Ürün ID'si.
        img_dir (str): Görsellerin bulunduğu dizin yolu.
        img_size (tuple): Görselin yeniden boyutlandırılacağı boyutlar.

    Returns:
        np.array: Düzleştirilmiş görsel.
    """
    try:
        # Görselin dosya yolunu oluştur
        filepath = os.path.join(img_dir, f"{product_id}.jpg")
        
        # Görseli oku ve işle
        img = Image.open(filepath).convert("L")  # Gri tonlama
        img = img.resize(img_size)  # Yeniden boyutlandır
        return np.array(img).flatten()  # Düzleştir
    except Exception as e:
        print(f"Error processing file {product_id}: {e}")
        # Hata durumunda sıfır dolu bir görsel döndür
        return np.zeros(img_size[0] * img_size[1])

# Görsellerin bulunduğu klasör
image_directory = os.path.join("data", "raw", "RBM_images")

# Görselleri işlemeye başla
X_images = np.array([
    process_image_from_id(product_id, image_directory)
    for product_id in df_fabric["UrunOptionRef"]
])


In [134]:
# Görselleri işlemeye başla
X_images_128 = np.array([
    process_image_from_id(product_id, image_directory, img_size=(128, 128))
    for product_id in df_fabric["UrunOptionRef"]
])

## Label Encoding

In [74]:
# Veriyi kopyalayarak çalışma
df_fabric = df_fabric.copy()

# Kumaş Deseni etiketlerini kodlama
label_encoder = LabelEncoder()
df_fabric["KumasDeseniEncoded"] = label_encoder.fit_transform(df_fabric["KumasDeseni"])

# Etiketleri numpy dizisine dönüştür
y_labels = df_fabric["KumasDeseniEncoded"].values


## Train/Test Split

In [75]:
# Veriyi eğitim ve test setlerine ayırma
X_train, X_test, y_train, y_test = train_test_split(X_images, y_labels, test_size=0.2, random_state=42, stratify=y_labels)

# Veriyi normalize etme
scaler = MinMaxScaler(feature_range=(0, 1))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [136]:
# Veriyi eğitim ve test setlerine ayırma
X_train_128, X_test_128, y_train, y_test = train_test_split(X_images_128, y_labels, test_size=0.2, random_state=42, stratify=y_labels)

# Veriyi normalize etme
scaler = MinMaxScaler(feature_range=(0, 1))
X_train_128 = scaler.fit_transform(X_train_128)
X_test_128 = scaler.transform(X_test_128)

## Feature Extraction with RBM

In [90]:
rbm = BernoulliRBM(n_components=256, learning_rate=0.01, n_iter=10, random_state=42)

# RBM ile özellik çıkarımı
X_train_rbm = rbm.fit_transform(X_train)
X_test_rbm = rbm.transform(X_test)

In [139]:
rbm_128 = BernoulliRBM(n_components=128, learning_rate=0.01, n_iter=10, random_state=42)

# RBM ile özellik çıkarımı
X_train_128rbm = rbm_128.fit_transform(X_train)
X_test_128rbm = rbm_128.transform(X_test)

In [142]:
rbm = BernoulliRBM(n_components=512, learning_rate=0.01, n_iter=10, random_state=42)

# RBM ile özellik çıkarımı
X_train_512rbm = rbm.fit_transform(X_train)
X_test_512rbm = rbm.transform(X_test)

In [137]:
# RBM ile özellik çıkarımı
X_train_rbm_128 = rbm.fit_transform(X_train_128)
X_test_rbm_128 = rbm.transform(X_test_128)

## Classification

### RBM-Logistic Model

In [91]:
# RBM ve Logistic Regression pipeline'ı oluşturma

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train_rbm, y_train)

y_pred = model.predict(X_test_rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.40      0.07      0.12        83
           DÜZ       0.53      0.97      0.69       660
         DİĞER       0.43      0.04      0.08       209
METRAJ BASKILI       1.00      0.01      0.02       104
           YOK       0.56      0.11      0.18       128
       ÇİZGİLİ       0.00      0.00      0.00        80

      accuracy                           0.53      1264
     macro avg       0.49      0.20      0.18      1264
  weighted avg       0.52      0.53      0.40      1264



In [138]:
# RBM ve Logistic Regression pipeline'ı oluşturma

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train_rbm_128, y_train)

y_pred = model.predict(X_test_rbm_128)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.57      0.05      0.09        83
           DÜZ       0.53      0.99      0.69       660
         DİĞER       0.33      0.02      0.04       209
METRAJ BASKILI       0.00      0.00      0.00       104
           YOK       0.44      0.03      0.06       128
       ÇİZGİLİ       0.00      0.00      0.00        80

      accuracy                           0.53      1264
     macro avg       0.31      0.18      0.15      1264
  weighted avg       0.41      0.53      0.38      1264



In [140]:
# RBM ve Logistic Regression pipeline'ı oluşturma

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train_128rbm, y_train)

y_pred = model.predict(X_test_128rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.00      0.00      0.00        83
           DÜZ       0.52      0.99      0.69       660
         DİĞER       0.30      0.01      0.03       209
METRAJ BASKILI       0.80      0.04      0.07       104
           YOK       0.67      0.02      0.03       128
       ÇİZGİLİ       0.00      0.00      0.00        80

      accuracy                           0.52      1264
     macro avg       0.38      0.18      0.14      1264
  weighted avg       0.46      0.52      0.37      1264



In [143]:
# RBM ve Logistic Regression pipeline'ı oluşturma

model = LogisticRegression(max_iter=1000, random_state=42)

model.fit(X_train_512rbm, y_train)

y_pred = model.predict(X_test_512rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.29      0.02      0.04        83
           DÜZ       0.53      0.97      0.69       660
         DİĞER       0.36      0.06      0.10       209
METRAJ BASKILI       1.00      0.04      0.07       104
           YOK       0.72      0.10      0.18       128
       ÇİZGİLİ       0.00      0.00      0.00        80

      accuracy                           0.53      1264
     macro avg       0.48      0.20      0.18      1264
  weighted avg       0.51      0.53      0.40      1264



### Balanced RBM-Logistic Model

In [92]:
model = LogisticRegression(max_iter=1000, class_weight="balanced", random_state=42)

# Modeli eğitme
model.fit(X_train_rbm, y_train)

# Test seti üzerinde tahmin yapma
y_pred = model.predict(X_test_rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))


Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.22      0.17      0.19        83
           DÜZ       0.56      0.04      0.07       660
         DİĞER       0.26      0.10      0.14       209
METRAJ BASKILI       0.22      0.12      0.15       104
           YOK       0.12      0.72      0.20       128
       ÇİZGİLİ       0.10      0.29      0.15        80

      accuracy                           0.15      1264
     macro avg       0.25      0.24      0.15      1264
  weighted avg       0.38      0.15      0.11      1264



### RBM-LightGBM Model:
Best and simple

In [94]:
model = LGBMClassifier(random_state=42, force_col_wise=True)

# Modeli eğitme
model.fit(X_train_rbm, y_train)

# Test seti üzerinde tahmin yapma
y_pred = model.predict(X_test_rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

[LightGBM] [Info] Total Bins 55863
[LightGBM] [Info] Number of data points in the train set: 5054, number of used features: 256
[LightGBM] [Info] Start training from score -2.719793
[LightGBM] [Info] Start training from score -0.649401
[LightGBM] [Info] Start training from score -1.798111
[LightGBM] [Info] Start training from score -2.502069
[LightGBM] [Info] Start training from score -2.293525
[LightGBM] [Info] Start training from score -2.759614
Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.26      0.10      0.14        83
           DÜZ       0.55      0.92      0.69       660
         DİĞER       0.40      0.10      0.16       209
METRAJ BASKILI       0.54      0.12      0.20       104
           YOK       0.67      0.16      0.25       128
       ÇİZGİLİ       0.27      0.05      0.08        80

      accuracy                           0.53      1264
     macro avg       0.45      0.24      0.26      1264
  weighted avg     

### RBM-LightGBM Balanced Classifier

In [96]:
# RBM ve Logistic Regression pipeline'ı oluşturma
model = LGBMClassifier(random_state=42, is_unbalance=True, force_col_wise=True)

# Modeli eğitme
model.fit(X_train_rbm, y_train)

# Test seti üzerinde tahmin yapma
y_pred = model.predict(X_test_rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

[LightGBM] [Info] Total Bins 55863
[LightGBM] [Info] Number of data points in the train set: 5054, number of used features: 256
[LightGBM] [Info] Start training from score -2.719793
[LightGBM] [Info] Start training from score -0.649401
[LightGBM] [Info] Start training from score -1.798111
[LightGBM] [Info] Start training from score -2.502069
[LightGBM] [Info] Start training from score -2.293525
[LightGBM] [Info] Start training from score -2.759614
Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.26      0.10      0.14        83
           DÜZ       0.55      0.92      0.69       660
         DİĞER       0.40      0.10      0.16       209
METRAJ BASKILI       0.54      0.12      0.20       104
           YOK       0.67      0.16      0.25       128
       ÇİZGİLİ       0.27      0.05      0.08        80

      accuracy                           0.53      1264
     macro avg       0.45      0.24      0.26      1264
  weighted avg     

### RBM-LightGBM F1-Tuned Model

In [109]:
import numpy as np
from sklearn.metrics import f1_score

# Custom F1-score metric for LightGBM
def f1_metric_lgbm(y_true, y_pred):
    # Convert LightGBM predictions to predicted class indices
    y_pred_classes = np.argmax(y_pred.reshape(len(np.unique(y_true)), -1), axis=0)
    
    # Calculate F1 score using sklearn's f1_score
    f1 = f1_score(y_true, y_pred_classes, average="macro")
    
    # Return in LightGBM's expected format (name, value, is_higher_better)
    return "f1", f1, True

# Initialize LightGBM with multiclass objective
model = LGBMClassifier(
    objective="multiclass",
    num_class=len(label_encoder.classes_),
    random_state=42,
    is_unbalance=True,
    force_col_wise=True,
)

# Fit model with the custom evaluation metric
model.fit(
    X_train_rbm,
    y_train,
    eval_set=[(X_test_rbm, y_test)],
    eval_metric=f1_metric_lgbm,  # Pass custom F1 metric here
)

# Predict and evaluate
y_pred_probs = model.predict_proba(X_test_rbm)  # Get probabilities for each class
y_pred = y_pred_probs.argmax(axis=1)  # Convert probabilities to class predictions

# Compute F1-score using scikit-learn
f1 = f1_score(y_test, y_pred, average="macro")
print(f"F1-Score: {f1}")

# Model performance report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))


[LightGBM] [Info] Total Bins 55863
[LightGBM] [Info] Number of data points in the train set: 5054, number of used features: 256
[LightGBM] [Info] Start training from score -2.719793
[LightGBM] [Info] Start training from score -0.649401
[LightGBM] [Info] Start training from score -1.798111
[LightGBM] [Info] Start training from score -2.502069
[LightGBM] [Info] Start training from score -2.293525
[LightGBM] [Info] Start training from score -2.759614
F1-Score: 0.25504301422513254
Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.26      0.10      0.14        83
           DÜZ       0.55      0.92      0.69       660
         DİĞER       0.40      0.10      0.16       209
METRAJ BASKILI       0.54      0.12      0.20       104
           YOK       0.67      0.16      0.25       128
       ÇİZGİLİ       0.27      0.05      0.08        80

      accuracy                           0.53      1264
     macro avg       0.45      0.24      0.26

In [113]:
from collections import Counter

import numpy as np
from sklearn.metrics import f1_score
# Sınıf ağırlıklarını hesapla
class_counts = Counter(y_train)
total_samples = len(y_train)
class_weights = {cls: total_samples / count for cls, count in class_counts.items()}

# Initialize LightGBM with multiclass objective
model = LGBMClassifier(
    objective="multiclass",
    num_class=len(label_encoder.classes_),
    random_state=42,
    class_weight=class_weights,
    force_col_wise=True,
)

# Fit model with the custom evaluation metric
model.fit(
    X_train_rbm,
    y_train,
    eval_set=[(X_test_rbm, y_test)]
)

# Predict and evaluate
y_pred_probs = model.predict_proba(X_test_rbm)  # Get probabilities for each class
y_pred = y_pred_probs.argmax(axis=1)  # Convert probabilities to class predictions

# Model performance report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))



[LightGBM] [Info] Total Bins 55863
[LightGBM] [Info] Number of data points in the train set: 5054, number of used features: 256
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
[LightGBM] [Info] Start training from score -1.791759
Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.24      0.11      0.15        83
           DÜZ       0.59      0.28      0.38       660
         DİĞER       0.34      0.12      0.18       209
METRAJ BASKILI       0.38      0.16      0.23       104
           YOK       0.12      0.70      0.20       128
       ÇİZGİLİ       0.16      0.07      0.10        80

      accuracy                           0.26      1264
     macro avg       0.30      0.24      0.21      1264
  weighted avg     

## RBM-SVM Model

In [97]:
model = SVC(kernel="rbf", C=1.0, gamma="scale", random_state=42)

# Model eğitme
model.fit(X_train_rbm, y_train)

# Tahmin yapma
y_pred = model.predict(X_test_rbm)

# Performans raporu
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))


Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.00      0.00      0.00        83
           DÜZ       0.52      1.00      0.69       660
         DİĞER       0.00      0.00      0.00       209
METRAJ BASKILI       0.00      0.00      0.00       104
           YOK       0.00      0.00      0.00       128
       ÇİZGİLİ       0.00      0.00      0.00        80

      accuracy                           0.52      1264
     macro avg       0.09      0.17      0.11      1264
  weighted avg       0.27      0.52      0.36      1264



## Simplest Best Model: 

In [114]:
model = LGBMClassifier(random_state=42, force_col_wise=True)

# Modeli eğitme
model.fit(X_train_rbm, y_train)

# Test seti üzerinde tahmin yapma
y_pred = model.predict(X_test_rbm)

# Model performansını ölçme
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

[LightGBM] [Info] Total Bins 55863
[LightGBM] [Info] Number of data points in the train set: 5054, number of used features: 256
[LightGBM] [Info] Start training from score -2.719793
[LightGBM] [Info] Start training from score -0.649401
[LightGBM] [Info] Start training from score -1.798111
[LightGBM] [Info] Start training from score -2.502069
[LightGBM] [Info] Start training from score -2.293525
[LightGBM] [Info] Start training from score -2.759614
Classification Report:
                precision    recall  f1-score   support

       BASKILI       0.26      0.10      0.14        83
           DÜZ       0.55      0.92      0.69       660
         DİĞER       0.40      0.10      0.16       209
METRAJ BASKILI       0.54      0.12      0.20       104
           YOK       0.67      0.16      0.25       128
       ÇİZGİLİ       0.27      0.05      0.08        80

      accuracy                           0.53      1264
     macro avg       0.45      0.24      0.26      1264
  weighted avg     

## Class Based Threshold Tuning

Stick to the default thresholds !!

In [116]:
best_thresholds = {}
best_f1_scores = {}

# Iterate through each class
for class_idx in range(len(label_encoder.classes_)):
    best_threshold = 0.0
    best_f1 = 0.0

    for threshold in np.arange(0.1, 0.9, 0.1):
        # Get the predicted probabilities for the current class
        y_pred_binary = (model.predict_proba(X_test_rbm)[:, class_idx] > threshold).astype(int)
        
        # Create binary labels for the current class
        y_val_binary = (y_test == class_idx).astype(int)
        
        # Calculate F1 score
        current_f1 = f1_score(y_val_binary, y_pred_binary, average="binary")
        
        if current_f1 > best_f1:
            best_f1 = current_f1
            best_threshold = threshold

    # Store the best threshold and F1 score for the current class
    best_thresholds[label_encoder.classes_[class_idx]] = best_threshold
    best_f1_scores[label_encoder.classes_[class_idx]] = best_f1

# Print the results
print("Best Thresholds and F1-Scores for each class:")
for cls in label_encoder.classes_:
    print(f"Class: {cls}, Best Threshold: {best_thresholds[cls]}, Best F1-Score: {best_f1_scores[cls]}")


Best Thresholds and F1-Scores for each class:
Class: BASKILI, Best Threshold: 0.1, Best F1-Score: 0.2097902097902098
Class: DÜZ, Best Threshold: 0.2, Best F1-Score: 0.6975476839237057
Class: DİĞER, Best Threshold: 0.1, Best F1-Score: 0.32493483927019984
Class: METRAJ BASKILI, Best Threshold: 0.2, Best F1-Score: 0.25149700598802394
Class: YOK, Best Threshold: 0.4, Best F1-Score: 0.2641509433962264
Class: ÇİZGİLİ, Best Threshold: 0.8, Best F1-Score: 0.09411764705882353


In [117]:
y_pred_custom = np.zeros_like(y_pred_probs.argmax(axis=1))
for class_idx, threshold in enumerate([0.1, 0.2, 0.1, 0.2, 0.4, 0.8]):
    y_pred_custom[(y_pred_probs[:, class_idx] > threshold)] = class_idx


In [118]:
from sklearn.metrics import classification_report

# Generate classification report
print("Classification Report with Custom Thresholds:")
print(classification_report(y_test, y_pred_custom, target_names=label_encoder.classes_, zero_division=0))


Classification Report with Custom Thresholds:
                precision    recall  f1-score   support

       BASKILI       0.24      0.06      0.10        83
           DÜZ       0.62      0.15      0.24       660
         DİĞER       0.19      0.88      0.32       209
METRAJ BASKILI       0.27      0.24      0.25       104
           YOK       0.58      0.16      0.26       128
       ÇİZGİLİ       0.50      0.05      0.09        80

      accuracy                           0.27      1264
     macro avg       0.40      0.26      0.21      1264
  weighted avg       0.49      0.27      0.24      1264



In [None]:
### FashionClip Model

to be continued

In [121]:
from clip import load

In [133]:
# Load the CLIP model
model, preprocess = load("ViT-B/32")

RecursionError: maximum recursion depth exceeded