In [4]:
import numpy as np
import cv2
from skimage.feature import local_binary_pattern
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
import tensorflow as tf
import keras
import pandas as pd

In [5]:


def extract_color_histogram(img, bins=(8, 8, 8)):

    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

def extract_lbp_features(img, P=24, R=3):

    gray_image = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    lbp = local_binary_pattern(gray_image, P, R, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, P + 3), range=(0, P + 2))
    return hist.astype("float").flatten()

def extract_features(img_path):


    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    img_preprocessed = preprocess_input(img_array_expanded)





    color_hist = extract_color_histogram(img_array)
    lbp_features = extract_lbp_features(img_array)


    combined_features = np.concatenate([color_hist, lbp_features])
    return combined_features


classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))


In [6]:
DATA_DIR = 'preprocessed_data'
IMAGE_SIZE = (240, 180)

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split




def load_data_and_labels(base_dir):
    data = []
    labels = []

    for folder_name in os.listdir(base_dir):
        print(folder_name)
        if folder_name.startswith("biome_"):
            biome_label = folder_name.split("_")[1]
            folder_path = os.path.join(base_dir, folder_name)

            for image_name in os.listdir(folder_path):
                print(image_name)
                if image_name.endswith(".jpg"):
                    image_path = os.path.join(folder_path, image_name)
                    features = extract_features(image_path)
                    data.append(features)
                    labels.append(biome_label)
    return np.array(data), np.array(labels)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

features, labels = load_data_and_labels("drive/MyDrive/"+DATA_DIR)
df1 = pd.DataFrame(features)
df1.to_csv("features.csv")
df2 = pd.DataFrame(labels)
df2.to_csv("labels.csv")

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
biome_21_195.jpg
biome_21_166.jpg
biome_21_221.jpg
biome_21_198.jpg
biome_21_229.jpg
biome_21_185.jpg
biome_21_158.jpg
biome_21_13.jpg
biome_21_107.jpg
biome_21_202.jpg
biome_21_266.jpg
biome_21_151.jpg
biome_21_139.jpg
biome_21_216.jpg
biome_21_228.jpg
biome_21_118.jpg
biome_21_29.jpg
biome_21_272.jpg
biome_21_220.jpg
biome_21_252.jpg
biome_21_172.jpg
biome_21_24.jpg
biome_21_183.jpg
biome_21_251.jpg
biome_21_194.jpg
biome_21_259.jpg
biome_21_132.jpg
biome_21_85.jpg
biome_21_16.jpg
biome_21_119.jpg
biome_21_11.jpg
biome_21_109.jpg
biome_21_292.jpg
biome_21_164.jpg
biome_21_136.jpg
biome_21_204.jpg
biome_21_256.jpg
biome_21_91.jpg
biome_21_39.jpg
biome_21_112.jpg
biome_21_279.jpg
biome_21_88.jpg
biome_21_264.jpg
biome_21_197.jpg
biome_21_248.jpg
biome_21_238.jpg
biome_21_153.jpg
biome_21_171.jpg
biome_21_236.jpg
biome_21_113.jpg
biome_21_40.jpg
biome_21_57.jpg
biome_21_72.jpg
biome_21_117.jpg
biome_21_69.

In [None]:
BIOMES = {
    37: 'badlands',
    39: 'badlands_plateau',
    16: 'beach',
    27: 'birch_forest',
    28: 'birch_forest_hills',
    46: 'cold_ocean',
    29: 'dark_forest',
    157: 'dark_forest_hills',
    49: 'deep_cold_ocean',
    50: 'deep_frozen_ocean',
    48: 'deep_lukewarm_ocean',
    24: 'deep_ocean',
    47: 'deep_warm_ocean',
    2: 'desert',
    17: 'desert_hills',
    130: 'desert_lakes',
    43: 'end_barrens',
    42: 'end_highlands',
    41: 'end_midlands',
    165: 'eroded_badlands',
    132: 'flower_forest',
    4: 'forest',
    10: 'frozen_ocean',
    11: 'frozen_river',
    160: 'giant_spruce_taiga',
    161: 'giant_spruce_taiga_hills',
    32: 'giant_tree_taiga',
    33: 'giant_tree_taiga_hills',
    131: 'gravelly_mountains',
    140: 'ice_spikes',
    21: 'jungle',
    23: 'jungle_edge',
    22: 'jungle_hills',
    45: 'lukewarm_ocean',
    167: 'modified_badlands_plateau',
    162: 'modified_gravelly_mountains',
    149: 'modified_jungle',
    151: 'modified_jungle_edge',
    166: 'modified_wooded_badlands_plateau',
    3: 'mountains',
    20: 'mountain_edge',
    14: 'mushroom_fields',
    15: 'mushroom_field_shore',
    8: 'nether',
    0: 'ocean',
    1: 'plains',
    7: 'river',
    35: 'savanna',
    36: 'savanna_plateau',
    163: 'shattered_savanna',
    164: 'shattered_savanna_plateau',
    40: 'small_end_islands',
    26: 'snowy_beach',
    13: 'snowy_mountains',
    30: 'snowy_taiga',
    31: 'snowy_taiga_hills',
    158: 'snowy_taiga_mountains',
    12: 'snowy_tundra',
    25: 'stone_shore',
    129: 'sunflower_plains',
    6: 'swamp',
    134: 'swamp_hills',
    5: 'taiga',
    19: 'taiga_hills',
    133: 'taiga_mountains',
    155: 'tall_birch_forest',
    156: 'tall_birch_hills',
    9: 'the_end',
    127: 'the_void',
    44: 'warm_ocean',
    38: 'wooded_badlands_plateau',
    18: 'wooded_hills',
    34: 'wooded_mountains',
}

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
import numpy as np


target_names = [BIOMES.get(int(label), str(label)) for label in labels]

X_train, X_test, y_train, y_test = train_test_split(features, target_names, test_size=0.2, random_state=42)

classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))
classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)



report = classification_report(y_test, predictions)
print("Classification Report:\n", report)



Classification Report:
                              precision    recall  f1-score   support

                   badlands       0.76      0.90      0.83        29
           badlands_plateau       0.58      0.70      0.64        10
                      beach       0.53      0.62      0.57        89
               birch_forest       0.58      0.52      0.55       166
         birch_forest_hills       0.00      0.00      0.00        48
                dark_forest       0.37      0.24      0.29       148
          dark_forest_hills       1.00      0.08      0.14        13
                     desert       0.78      0.93      0.85       334
               desert_hills       0.84      0.32      0.46        85
               desert_lakes       0.00      0.00      0.00        14
              flower_forest       0.47      0.30      0.37        23
                     forest       0.54      0.79      0.64       576
               frozen_ocean       1.00      1.00      1.00        14
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
features=pd.read_csv("features.csv",index_col=0).to_numpy()
labels=pd.read_csv("labels.csv",index_col=0).to_numpy()

target_names = [BIOMES.get(int(label), str(label)) for label in labels]


X_train, X_test, y_train, y_test = train_test_split(features, target_names, test_size=0.2, random_state=42)
label_encoder = LabelEncoder()


y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

model = xgb.XGBClassifier(objective='multi:softmax', num_class=len(np.unique(y_train)))
model.fit(X_train, y_train_encoded)

predictions = model.predict(X_test)
predictions = label_encoder.inverse_transform(predictions)
report = classification_report(y_test, predictions)
print("Classification Report:\n", report)


  target_names = [BIOMES.get(int(label), str(label)) for label in labels]


Classification Report:
                              precision    recall  f1-score   support

                   badlands       0.81      0.90      0.85        29
           badlands_plateau       0.75      0.60      0.67        10
                      beach       0.64      0.65      0.65        89
               birch_forest       0.65      0.66      0.66       166
         birch_forest_hills       0.53      0.19      0.28        48
                dark_forest       0.53      0.48      0.50       148
          dark_forest_hills       0.00      0.00      0.00        13
                     desert       0.78      0.90      0.84       334
               desert_hills       0.71      0.42      0.53        85
               desert_lakes       0.50      0.07      0.12        14
              flower_forest       0.87      0.57      0.68        23
                     forest       0.61      0.77      0.68       576
               frozen_ocean       0.93      0.93      0.93        14
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.3],
    'max_depth': [3, 5, 7],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9],
}

grid_search = GridSearchCV(xgb.XGBClassifier(objective='multi:softmax', num_class=len(np.unique(y_train_encoded))),
                           param_grid,
                           scoring='accuracy',
                           cv=3,
                           verbose=1,n_jobs=-1)

grid_search.fit(X_train, y_train_encoded)
print("Best parameters:", grid_search.best_params_)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


  pid = os.fork()


KeyboardInterrupt: 