In [1]:
import shapefile
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def get_df(path="fires_growth/fires"):
    with shapefile.Reader(path) as sf:
        fields = sf.fields
        columns = [x[0] for x in sf.fields][1:]
        records = sf.records()
        np_arr = np.array(records)
        df = pd.DataFrame(columns=columns, data=np_arr)
    
    with shapefile.Reader(path) as sf:
        df['burned'] = sf.shapes()

    df['id'] = df['year'].astype(str) + '_' + df['fireid'].astype(str)
    
    df = df.sort_values(by=['id', 'dt'])
    
    return df

In [2]:
def load_model(name):
    model = tf.keras.models.load_model(name + '.h5')
    return model

In [3]:
def get_fire_id(fire_ids):
    idx = np.random.randint(len(fire_ids))
    return(fire_ids[idx])

In [4]:
def toBitArr(shape):
    from PIL import Image, ImageDraw

    bb = shape.bbox
    # print(bb)
    x_min = bb[0]
    x_max = bb[2]
    y_min = bb[1]
    y_max = bb[3]

    polygon = []
    for point in shape.points:
        a = (point[0] - x_min) / (x_max - x_min) * 16 + 8
        b = (point[1] - y_min) / (y_max - y_min) * 16 + 8
        polygon.append((a, b))

    img = Image.new('L', (32, 32), 0)
    ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
    mask = np.array(img)
    return mask

In [5]:
def simulate(df, fire_id, model, visualize=False, deep=True):
    # Отобрать записи
    rows = df.loc[df['id'] == fire_id].sort_values(by=['dt'])
    
    # Промоделировать
    state = toBitArr(rows.iloc[0]['burned'])
    for i in range(len(rows)-1):
        state = model(state)
    
    if deep:
        state = state.numpy() > 0.5
    
    # Сравнить результаты моделирования с реальным процессом
    real_state = toBitArr(rows.iloc[-1]['burned'])
    iou = np.sum(state & real_state) / np.sum(state | real_state)
    
    return iou

In [6]:
def evaluate(model_name, fire_count=100):

    model = load_model(model_name)

    ious = ()
    
    df = get_df()
    
    for i in range(fire_count):
        fire_id = get_fire_id(df['id'].unique())
        iou = simulate(df, fire_id, model)
        ious += (iou,)

    print('Среднее значение IoU:', np.mean(ious))

In [7]:
from PIL import Image

def draw(arr):
    a = arr * 250
    a += 5
    a = a.repeat(10,axis=0).repeat(10,axis=1)
    img = Image.fromarray(a)

    img.show()

In [8]:
import shapefile
import numpy as np
from datetime import datetime
import pickle

ALL = "./fires_growth/fires.shp"
PRIVATE = "./fires_growth/private_fires.shp"
PUBLIC = "./fires_growth/public_fires.shp"

class Raw:
    def __init__(self, fireid, x, y):
        self.fireid: str = fireid
        self.x = x
        self.y = y

def prepare_dataset_v1(path="fires_growth/fires", dataset_name='data_v1'):
    
    x_data = []
    y_data = []
    data = []
    
    df = get_df(path)
    
    print('Всего элементов в исходном датасете:', df.shape[0])
    
    for i in range(df.shape[0]):
        try:
            x = toBitArr(df.iloc[i]['burned'])

            if df.iloc[i]['id'] == df.iloc[i+1]['id']:
                y = toBitArr(df.iloc[i+1]['burned'])
            else:
                y=x
            
            x_data.append(x)
            y_data.append(y)
            data.append(df.iloc[i]['id'])
            
            if i % 1000 == 0:
                print('Обработано {} элементов датасета'.format(i))
        except:
            pass

    dataset = {'x': x_data, 'y': y_data, 'data': None}
    with open(dataset_name + '.pickle', 'wb') as f:
        pickle.dump(dataset, f)

def prepare_dataset_v1_short(path="fires_growth/fires", dataset_name='data_v1_short', count=2000):
    
    x_data = []
    y_data = []
    data = []
    
    df = get_df(path)
    
    print('Всего элементов в исходном датасете:', df.shape[0])
    
    for i in range(df.shape[0]):
        try:
            x = toBitArr(df.iloc[i]['burned'])

            if df.iloc[i]['id'] == df.iloc[i+1]['id']:
                y = toBitArr(df.iloc[i+1]['burned'])
            else:
                y=x
            
            x_data.append(x)
            y_data.append(y)
            data.append(df.iloc[i]['id'])
            
            if i % 1000 == 0:
                print('Обработано {} элементов датасета'.format(i))
        except:
            pass
    
    x_result = []
    y_result = []
    data_result = []
    
    idx = np.random.randint(0, len(x_data), count)
    for i in idx:
        x_result.append(x_data[i])
        y_result.append(x_data[i])
        data_result.append(x_data[i])
        
    dataset = {'x': x_result, 'y': y_result, 'data': data_result}
    with open(dataset_name + '.pickle', 'wb') as f:
        pickle.dump(dataset, f)

In [9]:
def get_dataset(dataset_name):
    with open(dataset_name + '.pickle', 'rb') as f:
        dataset = pickle.load(f)
    
    return dataset

In [10]:
def prepare_dataset_v0():
    
    x_data = []
    y_data = []
    
    for i in range(500):
        x = np.random.random((1, 32, 32))
        y = np.zeros((1, 32, 32))
        x_data.append(x)
        y_data.append(y)
    
    dataset = {'x': x_data, 'y': y_data, 'data': None}
    with open('data_v0.pickle', 'wb') as f:
        pickle.dump(dataset, f)

In [11]:
def create_model():
    inputs = tf.keras.Input(shape=[32, 32])
    outputs = tf.keras.layers.Dense(32, activation='sigmoid')(inputs)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [12]:
import tensorflow as tf
import numpy as np

def train_model(dataset_name, model_name, epochs=300):
    print('Чтение датасета')
    dataset = get_dataset(dataset_name)
    
    x = dataset['x']
    y = dataset['y']
    
    print('Создание модели')
    model = create_model()
    model.compile(optimizer='adam', loss='binary_crossentropy')
    
    
    print('Старт обучения модели')
    x = np.array(x)
    x.resize((500, 32, 32))
    print(len(x))
    print(x[0].shape)
    y = np.array(y)
    y.resize((500, 32, 32))
    print(y.shape)
    print(y[0].shape)
    print('epochs', epochs)
    model.fit(x, y, epochs)
    
    print('Сохранение модели')
    model.save(model_name + '.h5')
    
    return model
    

In [13]:
def train(model, x, y, learning_rate, loss_fn):
    with tf.GradientTape() as t:
        current_loss = loss_fn(y, model(x))

In [14]:
def training_loop(model, x, y, epoch_num, loss_fn):
    for epoch in range(epoch_num):
        # Update the model with the single giant batch
        train(model, x, y, learning_rate=0.1, loss_fn=loss_fn)

In [15]:
def read_and_display_result():
    with shapefile.Reader("validate_public/validate_public") as sf:
        fields = sf.fields
        columns = [x[0] for x in sf.fields][1:]
        records = sf.records()
        np_arr = np.array(records)
        df1 = pd.DataFrame(columns=columns, data=np_arr)

    with shapefile.Reader("validate_public/validate_public") as sf:
        df1['burned'] = sf.shapes()

    with shapefile.Reader("result_format/result_format") as sf:
        fields = sf.fields
        columns = [x[0] for x in sf.fields][1:]
        records = sf.records()
        np_arr = np.array(records)
        df2 = pd.DataFrame(columns=columns, data=np_arr)

    with shapefile.Reader("result_format/result_format") as sf:
        df2['burned'] = sf.shapes()
        
    return df1, df2

In [16]:
def evaluate_model_v0():
    prepare_dataset_v0()
    train_model(dataset_name='data_v0', model_name='model_v0')
    print('Эффективность нулевой модели: ')
    evaluate('model_v0')

In [17]:
def evaluate_model_v1_full():
    prepare_dataset_v1()

In [18]:
# def evaluate_model_v1_private():
#     prepare_dataset_v1(path="fires_growth/private_fires", dataset_name='data_v1_private')
#     print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
#     train_model(dataset_name='data_v1_private', model_name='model_v1_private', epochs=2)
    
#     print('Эффективность первой модели: ')
#     evaluate('model_v1')


In [19]:
def evaluate_model_v1_short():
    prepare_dataset_v1_short(path="fires_growth/private_fires", dataset_name='data_v1_short')
    train_model(dataset_name='data_v1_short', model_name='model_v1_short', epochs=300)
    
    print('Эффективность первой модели(short): ')
    evaluate('model_v1_short')

In [20]:
evaluate_model_v1_short()

Всего элементов в исходном датасете: 22848
Обработано 0 элементов датасета
Обработано 1000 элементов датасета
Обработано 2000 элементов датасета
Обработано 3000 элементов датасета
Обработано 4000 элементов датасета
Обработано 5000 элементов датасета
Обработано 6000 элементов датасета
Обработано 7000 элементов датасета
Обработано 8000 элементов датасета
Обработано 9000 элементов датасета
Обработано 10000 элементов датасета
Обработано 11000 элементов датасета
Обработано 12000 элементов датасета
Обработано 13000 элементов датасета
Обработано 14000 элементов датасета
Обработано 15000 элементов датасета
Обработано 16000 элементов датасета
Обработано 17000 элементов датасета
Обработано 18000 элементов датасета
Обработано 19000 элементов датасета
Обработано 20000 элементов датасета
Обработано 21000 элементов датасета
Обработано 22000 элементов датасета
Чтение датасета
Создание модели
Старт обучения модели
500
(32, 32)
(500, 32, 32)
(32, 32)
epochs 300
Сохранение модели
Эффективность первой мо

In [21]:
train_model(dataset_name='data_v1_short', model_name='model_v1_short', epochs=300)

Чтение датасета
Создание модели
Старт обучения модели
500
(32, 32)
(500, 32, 32)
(32, 32)
epochs 300
Сохранение модели


<tensorflow.python.keras.engine.functional.Functional at 0x1e1e22fd6d0>

In [22]:
evaluate('model_v1_short')

Среднее значение IoU: 0.14904981506555934


In [23]:
def baseline():
    ious = ()

    df = get_df()
    model = lambda x: x
    for i in range(100):
        fire_id = get_fire_id(df['id'].unique())
        iou = simulate(df, fire_id, model, deep=False)
        ious += (iou,)

    print('Среднее значение IoU:', np.mean(ious))

In [24]:
import requests

def get_neighbor(bool_arr, x, y, prev_dir):
    '''
    :param x: x pos
    :param y: y pos
    :param prev_dir: 7 - bottom, 6 - bottom right, 5 - right, 4 - up right, 3 - up, 2 - up left, 1 - left, 0 - bottom left
    :return:
    '''

    FIRE = 1

    n_x = 0
    n_y = 0
    dir = 0
    c_dir = abs((prev_dir + 3) % 8)

    for i in range(8):
        c_dir = abs((c_dir - 1) % 8)

        if c_dir == 7 and bool_arr[y + 1][x] == FIRE:
            dir = c_dir
            n_x = x
            n_y = y + 1
            break
        elif c_dir == 6 and bool_arr[y + 1][x + 1] == FIRE:
            dir = c_dir
            n_x = x + 1
            n_y = y + 1
            break
        elif c_dir == 5 and bool_arr[y][x + 1] == FIRE:
            dir = c_dir
            n_x = x + 1
            n_y = y
            break
        elif c_dir == 4 and bool_arr[y - 1][x + 1] == FIRE:
            dir = c_dir
            n_x = x + 1
            n_y = y - 1
            break
        elif c_dir == 3 and bool_arr[y - 1][x] == FIRE:
            dir = c_dir
            n_x = x
            n_y = y - 1
            break
        elif c_dir == 2 and bool_arr[y - 1][x - 1] == FIRE:
            dir = c_dir
            n_x = x - 1
            n_y = y - 1
            break
        elif c_dir == 1 and bool_arr[y][x - 1] == FIRE:
            dir = c_dir
            n_x = x - 1
            n_y = y
            break
        elif c_dir == 0 and bool_arr[y + 1][x - 1] == FIRE:
            dir = c_dir
            n_x = x - 1
            n_y = y + 1
            break
    return n_x, n_y, dir


def to_polygon(bool_arr, bbox):

    x_min = bbox[0]
    x_max = bbox[2]
    y_min = bbox[1]
    y_max = bbox[3]
    x_diff = (x_max - x_min) / 16
    y_diff = (y_max - y_min) / 16
    x_bias = x_min - x_diff * 8
    y_bias = y_min - y_diff * 8

    start_x = 16
    start_y = 0
    for y in range(31, 0, -1):
        if bool_arr[y][16] == 1:
            start_y = y
            break
    # print(f"{start_x}, {start_y}")
    points = [(start_x * x_diff + x_bias, start_y * y_diff + y_bias)]

    x, y, dir = get_neighbor(bool_arr, start_x, start_y, 3)
    # print(f"{x}, {y}, {dir}")
    points.append((x * x_diff + x_bias, y * y_diff + y_bias))

    while x != start_x or y != start_y:
        prev_dir = dir
        x, y, dir = get_neighbor(bool_arr, x, y, dir)

        if dir != prev_dir or (x == start_x and y == start_y):
            # print(f"{x}, {y}, {dir}")
            points.append((x * x_diff + x_bias, y * y_diff + y_bias))

    return points

In [25]:
def test_validation():
    df1, df2 = read_and_display_result()

    w = shapefile.Writer('result/result')
    w.field('fireid', 'C')

    for i in range(df1.shape[0]):
        source_polygon = df1.iloc[i]['burned']
        arr = toBitArr(source_polygon)
        new_polygon = to_polygon(arr, source_polygon.bbox)
        w.poly([new_polygon])
        w.record(df1.iloc[i]['fireid'])

    w.close()
    


In [26]:
def visualize_validation_result():
    with shapefile.Reader("result/result") as sf:
            fields = sf.fields
            columns = [x[0] for x in sf.fields][1:]
            records = sf.records()
            np_arr = np.array(records)
            df3 = pd.DataFrame(columns=columns, data=np_arr)

    with shapefile.Reader("result/result") as sf:
        df3['burned'] = sf.shapes()

    result_polygon = df3.iloc[0]['burned']
    arr2 = toBitArr(result_polygon)
    draw(arr2)
    return df3

In [27]:
test_validation()
df3 = visualize_validation_result()
df3.head()
draw(toBitArr(df3.iloc[1]['burned']))