<a href="https://colab.research.google.com/github/curiosity806/2020_dacon_satellite_precipitation/blob/hotfix%2Fbaseline/gmi_preci_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import numpy as np
import sys

## Load data

In [0]:
train = np.load('/content/drive/My Drive/2020 Kaggle Study/data/train.npy')

## Comepensate precipatations

In [0]:
dr = [(-1, -1), (-1, 0), (-1, 1),
      (0, -1), (0, 0), (0, 1),
      (1, -1), (1, 0), (1, 1)]

# p1, p2: shape=(-1, 2).
def get_dist(p1, p2):
    x1 = np.deg2rad(p1[:,0])
    y1 = np.deg2rad(p1[:,1])
    x2 = np.deg2rad(p2[:,0])
    y2 = np.deg2rad(p2[:,1])
    dlon = x2 - x1
    dlat = y2 - y1
    a = np.sin(dlat/2)**2 + np.cos(y1) * np.cos(y2) * np.sin(dlon/2)**2 
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))    
    return 6373.0 * c  # km, shape=(-1).

# ori_ll에서의 value 값을 tgt_ll에 대한 값으로 바꾼다.
# value: (40, 40, -1)
# ori_ll, tgt_ll: (40, 40, 2)
# ori_ll: value에 대응되는 원래 latitude, longitude.
# tgt_ll: 변환되는 값에 대응되는 latitude, longitude.
def compen_ll(value, ori_ll, tgt_ll):
    ret = np.empty_like(value)

    n = value.shape[0]
    m = value.shape[1]

    for i in range(n):
        for j in range(m):
            nears = []  # (row, col, value)
            for k in range(9):
                ii = i + dr[k][0]
                jj = j + dr[k][1]
                if ii >= 0 and ii < n and jj >= 0 and jj < m:
                    nears.append((ori_ll[ii, jj][0], ori_ll[ii, jj][1],
                                  tgt_ll[i, j][0], tgt_ll[i, j][1],
                                  value[ii, jj]))

            nears = np.array(nears)  # shape=(-1, 5)
            dists = get_dist(nears[:, 0:2], nears[:, 2:4]).reshape(-1, 1)
            values = nears[:, 4].reshape(-1, 1)
            nears = np.concatenate((dists, values), 1)
            nears = nears[np.argsort(nears[:, 0])]  # sort by dist
            nears = nears[:4, :]  # 가까운 점 4개만 고려

            weights = 1 / (nears[:, 0] ** 2 + sys.float_info.epsilon)
            weighted_sum = (weights * nears[:, 1]).sum()
            ret[i, j] = weighted_sum / weights.sum()
    return ret

In [0]:
from multiprocessing import Process, Manager

n_procs = 4
procs = []
manager = Manager()
gmi_preci = manager.list([None] * n_procs)

# split train into n_procs arrays
n_imgs = train.shape[0]
splitted = np.split(train, np.arange((n_imgs + n_procs - 1) // n_procs,
                                     n_imgs, n_imgs // n_procs))

def proc_func(proc_id):
    part = splitted[proc_id]
    arr = np.empty_like(part[:, :, :, 14])  # shape=(-1, 40, 40)
    for i in range(part.shape[0]):
        arr[i, :, :] = compen_ll(part[i, :, :, 14],
                                 part[i, :, :, 12:14],
                                 part[i, :, :, 10:12])
        if (i + 1) % 1000 == 0:
            print(proc_id, i + 1)
    gmi_preci[proc_id] = arr

for proc_id in range(n_procs):
    proc = Process(target=proc_func, args=(proc_id, ))
    proc.start()
    procs.append(proc)

for proc in procs:
    proc.join()

gmi_preci = np.concatenate(gmi_preci)
gmi_preci = gmi_preci.reshape(-1, 40, 40, 1)
gmi_preci = gmi_preci.astype(np.float32)

## Save gmi precipatations data

In [0]:
np.save('/content/drive/My Drive/2020 Kaggle Study/data/gmi_preci.npy', gmi_preci)