In [6]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Author: Weijie Zou

import os
import numpy as np
import matplotlib.pyplot as plt
from utils.data_vis import plot_img_and_mask, plot_imgs, plot_mask

# Path to the images and masks
imgs_path = r'F:\Workspace\Projects\气象局技能大赛\基于机器学习的晴空回波识别\Data_adjusted_psahu_400\imgs'
masks_path = r'F:\Workspace\Projects\气象局技能大赛\基于机器学习的晴空回波识别\Data_adjusted_psahu_400\masks'

# List of files (without extensions) in the masks path
files_sv = os.listdir(masks_path)
files_sv = [f.split('.')[0] for f in files_sv]

# Initialize lists to store features for each class
r_0, v_0, w_0, ldr_0, text_r_0, text_v_0, text_w_0, height_0 = [], [], [], [], [], [], [], []
r_1, v_1, w_1, ldr_1, text_r_1, text_v_1, text_w_1, height_1 = [], [], [], [], [], [], [], []
r_2, v_2, w_2, ldr_2, text_r_2, text_v_2, text_w_2, height_2 = [], [], [], [], [], [], [], []

# Loop through the files and process each image and corresponding mask
for f in files_sv[:10]:  # Limiting to the first 10 files for testing
    # Load the image and mask
    img = np.load(os.path.join(imgs_path, f + '.npy'))
    r, v, w, ldr = img[:, :, 0], img[:, :, 1], img[:, :, 2], img[:, :, 3]
    mask = np.load(os.path.join(masks_path, f + '.npy'))

    # Create an echo mask based on conditions
    echo_mask = np.full(mask.shape, np.nan)
    echo_mask[r >= -50] = 1  # Clear-air echo
    echo_mask[v >= -15] = 1   # Clear-air echo
    echo_mask[w >= 0] = 1     # Clear-air echo
    echo_mask[mask == 1] += 1  # Meteorological echo
    echo_mask[:, 150:][echo_mask[:, 150:] == 1] = 2  #Meteorological echo
    mask[echo_mask == 2] = 1  # Adjust mask for Meteorological echo

    # Compute texture features for the image
    Text_r_m, Text_v_m, Text_w_m = np.zeros_like(r), np.zeros_like(v), np.zeros_like(w)
    for i in range(1, r.shape[0]):
        for j in range(1, r.shape[1]):
            Text_r_m[i, j] = np.abs(r[i, j] - r[i - 1, j])**2
            Text_v_m[i, j] = np.abs(v[i, j] - v[i - 1, j])**2
            Text_w_m[i, j] = np.abs(w[i, j] - w[i - 1, j])**2

    # Smooth texture features to create a more uniform representation
    Text_r, Text_v, Text_w, H = np.zeros_like(r), np.zeros_like(v), np.zeros_like(w), np.zeros_like(r)
    Height = np.arange(r.shape[1]) * 0.03  # Height based on range
    for i in range(r.shape[0]):
        for j in range(r.shape[1]):
            i_min, i_max = max(i - 2, 0), min(i + 2, r.shape[0] - 1)
            j_min, j_max = max(j - 2, 0), min(j + 2, r.shape[1] - 1)
            Text_r[i, j] = np.sum(Text_r_m[i_min:i_max + 1, j_min:j_max + 1]) / ((i_max - i_min + 1) * (j_max - j_min + 1))
            Text_v[i, j] = np.sum(Text_v_m[i_min:i_max + 1, j_min:j_max + 1]) / ((i_max - i_min + 1) * (j_max - j_min + 1))
            Text_w[i, j] = np.sum(Text_w_m[i_min:i_max + 1, j_min:j_max + 1]) / ((i_max - i_min + 1) * (j_max - j_min + 1))
            H[i, j] = Height[j]

    # Replace NaN values in the mask
    echomask = echo_mask
    echomask[np.isnan(echomask)] = 0

    # Append features for each echo class
    r_0.append(r[echomask == 0].flatten())
    v_0.append(v[echomask == 0].flatten())
    w_0.append(w[echomask == 0].flatten())
    ldr_0.append(ldr[echomask == 0].flatten())
    text_r_0.append(Text_r[echomask == 0].flatten())
    text_v_0.append(Text_v[echomask == 0].flatten())
    text_w_0.append(Text_w[echomask == 0].flatten())
    height_0.append(H[echomask == 0].flatten())

    r_1.append(r[echomask == 1].flatten())
    v_1.append(v[echomask == 1].flatten())
    w_1.append(w[echomask == 1].flatten())
    ldr_1.append(ldr[echomask == 1].flatten())
    text_r_1.append(Text_r[echomask == 1].flatten())
    text_v_1.append(Text_v[echomask == 1].flatten())
    text_w_1.append(Text_w[echomask == 1].flatten())
    height_1.append(H[echomask == 1].flatten())

    r_2.append(r[echomask == 2].flatten())
    v_2.append(v[echomask == 2].flatten())
    w_2.append(w[echomask == 2].flatten())
    ldr_2.append(ldr[echomask == 2].flatten())
    text_r_2.append(Text_r[echomask == 2].flatten())
    text_v_2.append(Text_v[echomask == 2].flatten())
    text_w_2.append(Text_w[echomask == 2].flatten())
    height_2.append(H[echomask == 2].flatten())

# Concatenate all features for each echo class
r_0, v_0, w_0, ldr_0, text_r_0, text_v_0, text_w_0, height_0 = map(np.concatenate, [r_0, v_0, w_0, ldr_0, text_r_0, text_v_0, text_w_0, height_0])
r_1, v_1, w_1, ldr_1, text_r_1, text_v_1, text_w_1, height_1 = map(np.concatenate, [r_1, v_1, w_1, ldr_1, text_r_1, text_v_1, text_w_1, height_1])
r_2, v_2, w_2, ldr_2, text_r_2, text_v_2, text_w_2, height_2 = map(np.concatenate, [r_2, v_2, w_2, ldr_2, text_r_2, text_v_2, text_w_2, height_2])

# Create training sets for each class by stacking features
train_0 = np.stack([r_0, v_0, w_0, ldr_0, text_r_0, text_v_0, text_w_0, height_0], axis=-1).reshape(-1, 8)
train_1 = np.stack([r_1, v_1, w_1, ldr_1, text_r_1, text_v_1, text_w_1, height_1], axis=-1).reshape(-1, 8)
train_2 = np.stack([r_2, v_2, w_2, ldr_2, text_r_2, text_v_2, text_w_2, height_2], axis=-1).reshape(-1, 8)

# Define sample size
sample_size = 4000

# Randomly select a sample of size `sample_size` from each class
sample_size_0 = min(sample_size, train_0.shape[0])
sample_size_1 = min(sample_size, train_1.shape[0])
sample_size_2 = min(sample_size, train_2.shape[0])

print("Sample numbers:", sample_size_0, sample_size_1, sample_size_2)

# Randomly select samples
indices_0 = np.random.choice(train_0.shape[0], sample_size_0, replace=False)
indices_1 = np.random.choice(train_1.shape[0], sample_size_1, replace=False)
indices_2 = np.random.choice(train_2.shape[0], sample_size_2, replace=False)

train_0_sampled = train_0[indices_0]
train_1_sampled = train_1[indices_1]
train_2_sampled = train_2[indices_2]

# Generate corresponding labels for each class
labels_0 = np.zeros(sample_size_0)
labels_1 = np.ones(sample_size_1)
labels_2 = np.full(sample_size_2, 2)

# Combine training data and labels
train_set = np.concatenate([train_0_sampled, train_1_sampled, train_2_sampled], axis=0)
train_set[np.isnan(train_set)] = -999  # Handle NaN values by replacing with -999
labels_set = np.concatenate([labels_0, labels_1, labels_2], axis=0)

# Shuffle the training set and labels
indices = np.arange(train_set.shape[0])
np.random.shuffle(indices)
train_set = train_set[indices]
labels_set = labels_set[indices]

# Output shapes
print("Train set shape:", train_set.shape)
print("Label set shape:", labels_set.shape)
print("Unique labels distribution:", np.unique(labels_set, return_counts=True))


Sample numbers: 4000 4000 4000
Train set shape: (12000, 8)
Label set shape: (12000,)
Unique labels distribution: (array([0., 1., 2.]), array([4000, 4000, 4000], dtype=int64))


In [7]:
# Train model
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


X_train, y_train = train_set, labels_set


clf = MLPClassifier(hidden_layer_sizes=(20, 50, 50, 20), max_iter=10000)
clf.fit(X_train, y_train)

# Save model
from joblib import dump, load
dump(clf, 'model_BP.joblib')





['model_BP.joblib']