# 安装库

In [None]:
# 库的datasets链接地址

# https://www.kaggle.com/datasets/bladerunner2022/mmcls0250
# https://www.kaggle.com/datasets/isps737/mmdetection-2-26-0
# https://www.kaggle.com/datasets/ermak9/pycocotools
# https://www.kaggle.com/code/atom1231/mmdet3-wheels

In [None]:
# # Install pycocotools package
import os
!mkdir /kaggle/working/packages
!cp -r /kaggle/input/pycocotools/* /kaggle/working/packages
os.chdir("/kaggle/working/packages/pycocotools-2.0.6/")
!python setup.py install -q
!pip install . --no-index --find-links /kaggle/working/packages/ -q

!pip install /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/addict-2.4.0-py3-none-any.whl
!pip install /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/yapf-0.32.0-py2.py3-none-any.whl
!pip install /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/terminal-0.4.0-py3-none-any.whl
!pip install /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/terminaltables-3.1.10-py2.py3-none-any.whl
#ytt
!pip install /kaggle/input/mmdet3-wheels/mmcv_full-1.7.1-cp310-cp310-linux_x86_64.whl

!cp -r /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/mmdetection/ /kaggle/working/
%cd /kaggle/working/mmdetection
!pip install -e . --no-deps
%cd /kaggle/working/

!pip install /kaggle/input/mmdetection-2-26-0/mmdetection-2-26-0/mmdet-2.26.0-py3-none-any.whl

!pip install /kaggle/input/mmcls0250/mmcls-0.25.0-py2.py3-none-any.whl

In [None]:
import cv2
import numpy as np
import pandas as pd
import os
from glob import glob
from tqdm.notebook import tqdm
import sys
import gc
sys.path.append('/kaggle/input/mmsegmentation-030/mmsegmentation-030') # 将我们的mmsegmentation包上传到kaggle
from mmseg.apis import init_segmentor, inference_segmentor
from mmcv.utils import config
import torch
import matplotlib.pyplot as plt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# 配置

In [None]:
configs = [
    '/kaggle/input/upernet-convnext/upernet_convnext_l_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_l_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_l_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_l_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_l_config.py',

    '/kaggle/input/upernet-convnext/upernet_convnext_b_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_b_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_b_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_b_config.py',
    '/kaggle/input/upernet-convnext/upernet_convnext_b_config.py',

]

ckpts = [
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold0_epoch_24_0.85_0.7034.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold1_epoch_22_0.80_0.7088.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold2_epoch_25_0.85_0.7012.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold3_epoch_23_0.85_0.7055.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold4_epoch_16_0.85_0.7000.pth',
    
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold0_epoch_19_0.85_0.6813.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold1_epoch_21_0.85_0.6945.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold2_epoch_23_0.85_0.6866.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold3_epoch_26_0.80_0.6885.pth',
    f'/kaggle/input/upernet-convnext/upernet_convnext_l_fold4_epoch_23_0.85_0.7043.pth',
]

models = []
for idx,(cfg, ckpt) in enumerate(zip(configs, ckpts)):
    cfg = config.Config.fromfile(cfg) # 读取配置文件
    cfg.model.test_cfg.return_logits = True # 返回logits
    print(cfg)
    model = init_segmentor(cfg, ckpt, device='cuda:0') # 初始化模型
    models.append(model) # 加入模型列表
print(len(models))

In [None]:
base_dir = '/kaggle/input/google-research-identify-contrails-reduce-global-warming' # 数据集路径
test_df = pd.read_csv(f'{base_dir}/sample_submission.csv') # 读取测试集
test_df["path"] = test_df["record_id"].apply(lambda x: f"{base_dir}/test/{x}/") # 构造测试集路径
test_df

# datasets

In [None]:
# 定义一个函数来读取数据记录， 符合 false color 的格式
def read_record(directory):
    record_data = {}
    for x in [
        "band_11", 
        "band_14", 
        "band_15"
    ]:

        record_data[x] = np.load(os.path.join(directory, x + ".npy"))

    return record_data

# 数据归一化函数，将数据映射到 [0, 1] 范围
def normalize_range(data, bounds):
    """Maps data to the range [0, 1]."""
    return (data - bounds[0]) / (bounds[1] - bounds[0])

# 定义函数获得假色图像
def get_false_color(record_data):
    _T11_BOUNDS = (243, 303)
    _CLOUD_TOP_TDIFF_BOUNDS = (-4, 5)
    _TDIFF_BOUNDS = (-4, 2)

    N_TIMES_BEFORE = 4

    r = normalize_range(record_data["band_15"] - record_data["band_14"], _TDIFF_BOUNDS)
    g = normalize_range(record_data["band_14"] - record_data["band_11"], _CLOUD_TOP_TDIFF_BOUNDS)
    b = normalize_range(record_data["band_14"], _T11_BOUNDS)
    false_color = np.clip(np.stack([r, g, b], axis=2), 0, 1)
    img = false_color[..., N_TIMES_BEFORE]

    return img

In [None]:
def rle_encode(x, fg_val=1):
    """
    使用RLE编码方法对图像进行编码。

    Args:
        x: numpy数组，形状为 (height, width)，其中1表示遮罩部分，0表示背景。
        fg_val: 前景值，默认为1，表示遮罩部分。

    Returns: 返回RLE编码后的列表
    """

    # 将图像展平，并找到值为fg_val（默认为1，表示前景或遮罩）的所有像素的索引。
    # .T表示对数组进行转置，使其在内存中以列优先（Fortran顺序）存储。
    dots = np.where(x.T.flatten() == fg_val)[0]

    run_lengths = []  # 存储RLE编码的结果
    prev = -2  # 初始化一个前一个元素的值，用于后续的比较

    # 遍历所有前景像素的索引
    for b in dots:
        # 如果当前索引和前一个索引之间的差大于1，则表示是一个新的连续序列的开始
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))  # 添加新的开始位置和长度为0的连续序列

        # 更新当前连续序列的长度
        run_lengths[-1] += 1

        # 更新前一个元素的值
        prev = b

    return run_lengths

def list_to_string(x):
    """
    将列表转换为字符串表示。
    
    如果列表为空，则返回'-'。

    Args:
        x: 待转换的列表。

    Returns: 返回转换后的字符串
    """

    # 如果列表非空
    if x:
        # 将列表转换为字符串，并移除'['、']'和','字符
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        # 如果列表为空，返回'-'
        s = '-'
    return s


# Inference

In [None]:
# 初始化一个列表用于存储所有图像的预测结果
all_preds = []

# 遍历测试数据集的每一条记录
for idx, record_id, encoded_pixels, path in test_df.itertuples():
    # 从给定的路径读取记录（可能是图像或其他数据）
    data = read_record(path)

    # 获取虚假颜色图像（false color image）或其他某种预处理的图像
    img = get_false_color(data)
    # 将图像数据转换为浮点类型
    img = img.astype(np.float32)

    # 初始化一个列表用于存储各个模型的预测结果
    models_res = []

    # 遍历每个模型并进行推断
    for model in models:
        model_pred = inference_segmentor(model, img)[0]
        models_res.append(model_pred)

    # 将各个模型的预测结果合并到一个数组中
    models_res = np.array(models_res)

    # 计算模型预测结果的平均值
    img_pred = np.mean(models_res, axis=0)
    print("img_pred:", img_pred.shape)

    # 对平均后的预测结果进行阈值处理，将大于0.75的值设置为1，小于的设置为0
    img_pred = 1 * (img_pred > 0.75)

    # 使用RLE编码对预测的遮罩进行编码
    rle = list_to_string(rle_encode(img_pred))

    # 将该图像的记录ID和RLE编码添加到预测结果列表中
    all_preds.append({'record_id': record_id, 'encoded_pixels': rle})

In [None]:
# 保存submission.csv 用于提交
!rm -rf /kaggle/working/*
df = pd.DataFrame(all_preds)
df.to_csv('submission.csv', index=False)
df