In [1]:
import geopandas as gpd
from glob import glob
import os 
import pandas as pd
import numpy as np

def gen_buffer_dict(buffer:float) -> dict:
    import warnings
    lower_threshold = 3
    upper_threshold = 8
    upper_limit = 10
    if(buffer <= lower_threshold):
        raise ValueError(f"Buffer can not be less than {lower_threshold}. {buffer=}")
    elif(buffer > upper_threshold):
        raise ValueError(f"Buffer should not be grater or equal than {upper_threshold}. {buffer=}")

    buffer_dict = {
        "circle_s": 2,
        "circle_m": buffer - 2,
        "circle_l": buffer,
        "donut_o": (buffer, buffer - 2),
        "donut_i": (buffer - 2, 2),
    }
    return buffer_dict

buffer = 5.5
shape_path = "/root/data/SHP/CR11_status.shp"

In [2]:
geo_shape = gpd.read_file(shape_path)
buffer_dict = gen_buffer_dict(buffer)

In [3]:
def get_histogram(path:str) -> list[float]:
    import rasterio
    import numpy as np
    src = rasterio.open(path)
    arr = src.read()
    arr = np.array(arr)
    arr.flatten()
    arr = arr[(arr > -3.4028235e+38)]
    arr = arr[(arr > arr.min())]
    total = len(arr)

    hists = []

    hists.append(arr[(arr < 0)])
    hists.append(arr[(arr >= 0) & (arr <= 0.1)])
    hists.append(arr[(arr > 0.1) & (arr <= 0.2)])
    hists.append(arr[(arr > 0.2) & (arr <= 0.3)])
    hists.append(arr[(arr > 0.3) & (arr <= 0.4)])
    hists.append(arr[(arr > 0.4) & (arr <= 0.5)])
    hists.append(arr[(arr > 0.5) & (arr <= 0.6)])
    hists.append(arr[(arr > 0.6) & (arr <= 0.7)])
    hists.append(arr[(arr > 0.7) & (arr <= 0.8)])
    hists.append(arr[(arr > 0.8) & (arr <= 0.9)])
    hists.append(arr[(arr > 0.9)])

    for idx, hist in enumerate(hists):
        hists[idx] = round( 100*len(hist)/total , 2)

    return hists

In [4]:
for idx, geo in geo_shape.iterrows():
    index = geo['MainID'] if 'MainID' in geo.keys() else idx
    buffer_name = list(buffer_dict.items())[0][0]
    filename = f"{index}_{buffer_name}.tiff"
    path = os.path.join('data','buffer',filename)
    print(path)
    print(os.path.exists(path))
    break

data/buffer/CR11-BMP-200_circle_s.tiff
True


In [5]:
hists = []
columns = []
temp_names = ['mi', '00', '01', 
             '02', '03', '04', 
             '05', '06', '07', 
             '08', '09']
for buffer_name,_ in buffer_dict.items():
    filename = f"{index}_{buffer_name}.tiff"
    path = os.path.join('data','buffer',filename)
    if(os.path.exists(path) == False):
        raise FileExistsError(f"filename={path} not exist.")
    print(filename)
    for temp_name in temp_names:
        columns.append(f"{buffer_name}_{temp_name}")
    hists.extend(get_histogram(path))

CR11-BMP-200_circle_s.tiff
CR11-BMP-200_circle_m.tiff
CR11-BMP-200_circle_l.tiff
CR11-BMP-200_donut_o.tiff
CR11-BMP-200_donut_i.tiff


In [6]:
X = pd.DataFrame(columns=columns,data=np.array(hists).reshape(1,-1))

In [7]:
import pickle
with open('model/RandomForestClassifier', 'rb') as handle:
    model = pickle.load(handle)

In [36]:
model.predict(X[model.feature_names_in_])[0] == 1

False

In [17]:
'MainID' in geo_shape.keys()

True

In [29]:
geo_shape.set_index('MainID', inplace=True)


KeyError: "None of ['MainID'] are in the columns"

In [30]:
geo_shape['health'] = '0'



In [33]:
geo_shape.loc['CR11-BMP-200', 'health'] = 'healthy'