In [1]:
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta
from PIL import Image
from pathlib import Path

In [2]:
size = 256
kernel_size = 64
weight = 10

In [3]:
class Patient:
    def age_category(age):
        age = int(age[:-1])
        if age == 0: return 0
        elif age == 100: return 10
        return age // 10
            
    def sex_category(sex):
        if sex == 'male': return 0
        return 1

    def infection_case_category(infection_case, causes):
        return causes.index(infection_case)
    
    def type_category(visit_type, move_types):
        return move_types.index(visit_type)

    def day_category(day):
        day = datetime.strptime(day, "%Y-%m-%d")
        return day.weekday()

In [4]:
def unique_value(array, property):
    array = array[property]
    array = array.drop_duplicates(keep='last')
    array = array.tolist()
    array.sort()
    return array

def combine_places(places, counts, causes, visit_types):
    indices = []
    for i in range(len(places)):
        one_visit = places.iloc[i]
        indices.append(df_to_grid_index(one_visit, counts, causes, visit_types))
    return indices

def df_to_grid_index(one_visit, counts, causes, visit_types):
    index = 0
    p_age = Patient.age_category(one_visit['age'])
    index += counts[0]
    p_sex = Patient.sex_category(one_visit['sex']) + index
    index += counts[1]
    p_infection_case = Patient.infection_case_category(one_visit['infection_case'], causes) + index
    index += counts[2]
    p_type = Patient.type_category(one_visit['type'], visit_types) + index
    index += counts[3]
    p_date = Patient.day_category(one_visit['date']) + index
    row = one_visit['row']
    col = one_visit['col']
    
    return [p_age, p_sex, p_infection_case, p_type, p_date, row, col]

def put_triangular_kernel(array, row, col, value, depth):
    stride = int((depth - 1) / 2)
    ratio = 1 / (stride + 1)
    
    c = 1
    
    for i in range(stride, 0, -1):
        if row - i >=0 and row - i < array.shape[0] and col-i>=0 and col-i<array.shape[1]:
            array[row - i][col-i] += (c * ratio * value)

        if row - i >= 0 and row-i<array.shape[0] and col+i <array.shape[1]:
            array[row - i][col+i] += (c * ratio * value)
            
        if row + i < array.shape[0] and row + i >= 0 and col-i >= 0 :
            array[row + i][col-i] += (c * ratio * value)
            
        if row + i < array.shape[0] and row + i >= 0 and col+i <array.shape[1]:
            array[row + i][col+i] += (c * ratio * value)
            
        for j in range(col-i + 1, col+i):
            if row-i >= 0 and j >= 0 and row-i<array.shape[0] and j<array.shape[1]:
                array[row-i][j] += (c * ratio * value)
            if j >= 0 and j < array.shape[1] and row+i<array.shape[0]:
                array[row+i][j] += (c * ratio * value)

        for j in range(row+i - 1, row-i, -1):
            if col + i >= 0 and col+i<array.shape[1] and j >=0 and j<array.shape[1]:
                array[j][col + i] += (c * ratio * value)
            if col - i >= 0 and col-i<array.shape[1] and j<array.shape[1] and j>=0:
                array[j][col - i] += (c * ratio * value)
        c += 1
    
    array[row][col] = value
    
    return array

def overlay_kernel(array):
    new_image = np.zeros((array.shape[0],array.shape[1]))
    for row in range(array.shape[0]):
        for col in range(array.shape[1]):
            if array[row][col] == 0: continue
            new_image += put_triangular_kernel(np.zeros((array.shape[0], array.shape[1])), row, col, array[row][col], kernel_size)
    image_array = new_image
        
    return new_image

def indices_save_image(path, place_indices):
    all_counts = sum(count for count in counts)
    visit_grid = np.zeros((all_counts, size, size))
    
    for index in place_indices:
        row = index[5]
        col = index[6]
        for feature in range(5):
            visit_grid[index[feature]][row][col] += weight
    
    for channel in range(visit_grid.shape[0]):
        save_grid(path + str(channel) + ".png", visit_grid[channel])
    
def save_grid(path, grid):
    grid = overlay_kernel(grid)
    img = Image.fromarray(grid.astype('uint8'), 'L')
    img.save(path)

In [5]:
routes = pd.read_csv('covid19/MergedRoute.csv')
dates = unique_value(routes, 'date')
patients = unique_value(routes, 'patient_id')

In [6]:
names = ['age', 'sex', 'infection_case', 'type', 'date']
counts = [11, 2, 4, 21, 7]
visit_types = ['karaoke', 'gas_station', 'gym', 'bakery', 'pc_cafe',
              'beauty_salon', 'school', 'church', 'bank', 'cafe',
              'bar', 'post_office', 'real_estate_agency', 'lodging',
              'public_transportation', 'restaurant', 'etc', 'store',
              'hospital', 'pharmacy', 'airport']
causes = ['community infection', 'etc', 'contact with patient', 'overseas inflow']

### Raw Data 저장

In [7]:
def save_patient_route(path, patient, routes):
    patient_places = routes[routes['patient_id']==patient]
    patient_dates = unique_value(patient_places, 'date')
    first_day = datetime.strptime(patient_dates[0], "%Y-%m-%d")
    last_day = datetime.strptime(patient_dates[-1], "%Y-%m-%d") # + timedelta(days=3)
    delta = last_day - first_day
    duration = delta.days + 1

    # 저장
    patient_path = path + str(patient)
    Path(patient_path).mkdir(parents=True, exist_ok=True)

    today = first_day
    while(True):
        today_str = datetime.strftime(today, "%Y-%m-%d")
        patient_day_places = patient_places[patient_places['date']==today_str]
        places_indices = combine_places(patient_day_places, counts, causes, visit_types)
        patient_date_path = patient_path + "/" + today_str + '/'
        Path(patient_date_path).mkdir(parents=True, exist_ok=True)
        indices_save_image(patient_date_path, places_indices)
        if today == last_day: break
        today += timedelta(days=1)

def get_patient_route(patient, routes):
    patient_places = routes[routes['patient_id']==patient]
    patient_dates = unique_value(patient_places, 'date')
    first_day = datetime.strptime(patient_dates[0], "%Y-%m-%d")
    last_day = datetime.strptime(patient_dates[-1], "%Y-%m-%d") # + timedelta(days=3)
    delta = last_day - first_day
    duration = delta.days + 1

    patient_routes = []
    today = first_day
    while(True):
        today_str = datetime.strftime(today, "%Y-%m-%d")
        patient_day_places = patient_places[patient_places['date']==today_str]
        places_indices = combine_places(patient_day_places, counts, causes, visit_types)
        patient_routes.append([today_str, places_indices]) 

        if today == last_day: break
        today += timedelta(days=1)
        
    return patient_routes

#### 확진자 기준으로 raw data 저장
```
path = 'covid_images/patient_figure_raw_64_30_3/'
for patient in patients:
    save_patient_route(path, patient, routes)
```

### 3일 누적 데이터 저장

In [8]:
def accumulate_two_days(day1, day2):
    day2[1].extend(day1[1])
    return day2
    
def accumulate_patient(patient, routes):
    patient_route = get_patient_route(patient, routes)
    patient_days = len(patient_route)

    second = patient_days - 1
    first = second - 1
    for i in range(2 * patient_days - 3):
        patient_route[second] = accumulate_two_days(patient_route[first], patient_route[second])
        if second - first == 2: second -=1
        else: first -= 1
    
    return patient_route

def save_patient_routes(path, patient, patient_routes):
    path += str(patient) + '/'
    Path(path).mkdir(parents=True, exist_ok=True)
    for routes in patient_routes:
        patient_date_path = path + routes[0] + '/'
        Path(patient_date_path).mkdir(parents=True, exist_ok=True)
        indices_save_image(patient_date_path, routes[1])

#### 누적 경로 저장
```
path = 'covid_images/patient_figure_accumulated_64_30_3/'
for patient in patients:
    accumulated_routes = accumulate_patient(patient, routes)
    save_patient_routes(path, patient, accumulated_routes)
```

### 각 날짜별 취합

In [9]:
def get_complete_routes(routes, dates, patients):
    first_day = datetime.strptime(dates[0], "%Y-%m-%d")
    last_day = datetime.strptime(dates[-1], "%Y-%m-%d") # + timedelta(days=3)

    # 날짜별 경로 배열 생성
    today = first_day
    complete_routes = []
    while(True):
        today_str = datetime.strftime(today, "%Y-%m-%d")
        places = []
        complete_routes.append([today_str, places])

        if today == last_day: break
        today += timedelta(days=1)

    # 환자 경로 가져온 다음 날짜대로 배치
    for patient in patients:
        accumulated_routes = accumulate_patient(patient, routes)
        for each_route in accumulated_routes:
            route_day = datetime.strptime(each_route[0], "%Y-%m-%d")
            route_places = each_route[1]
            index = (route_day - first_day).days
            complete_routes[index][1].extend(route_places)
    return complete_routes

#### 취합한 경로 저장

complete_routes = get_complete_routes(routes, dates, patients)

path = 'covid_images/complete_figure_kernel_64_10_4/'
Path(path).mkdir(parents=True, exist_ok=True)

for days in complete_routes:
    date_path = path + days[0] + '/'
    Path(date_path).mkdir(parents=True, exist_ok=True)
    indices_save_image(date_path, days[1])


#### 아직 하는 중
```
def get_accumulated_routes_by_date(routes, dates, patients):
    first_day = datetime.strptime(dates[0], "%Y-%m-%d")
    last_day = datetime.strptime(dates[-1], "%Y-%m-%d")

    # 날짜별 경로 배열 생성
    today = first_day
    complete_routes = []
    while(True):
        today_str = datetime.strftime(today, "%Y-%m-%d")
        ids = []
        places = []
        complete_routes.append([today_str, []])
        if today == last_day: break
        today += timedelta(days=1)

    for patient in patients:
        accumulated_routes = accumulate_patient(patient, routes)
        for each_route in accumulated_routes:
            route_day = datetime.strptime(each_route[0], "%Y-%m-%d")
            route_places = each_route[1]
            index = (route_day - first_day).days
            complete_routes[index][1].append([patient, route_places])
    return complete_routes

accumulated_routes = get_accumulated_routes_by_date(routes, dates, patients)

path = 'covid_images/patient_accumulated_by_date/'
Path(path).mkdir(parents=True, exist_ok=True)

for days in accumulated_routes:
    date_path = path + days[0] + '/'
    Path(date_path).mkdir(parents=True, exist_ok=True)
    for day in days[1]:
        patient_id = day[0]
        routes = day[1]
        day_path = date_path + str(patient_id) + '/'
        Path(day_path).mkdir(parents=True, exist_ok=True)
        indices_save_image(day_path, routes)
```

### ConvLSTM

In [10]:
from keras.models import Sequential, load_model
from keras.layers.convolutional import Conv3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
import keras.backend.tensorflow_backend as K
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
import keras.backend.tensorflow_backend as tfback

Using TensorFlow backend.


In [32]:
rs = 256
channel = 45
n_step = 3
n_test = 3
n_train = 60
epochs = 200
batch_size = 32

In [33]:
# We create a layer which take as input movies of shape
# (n_frames, width, height, channels) and returns a movie
# of identical shape.
def get_model(channel):
    with K.tf_ops.device('/device:GPU:0'):
        seq = Sequential()
        seq.add(ConvLSTM2D(filters=channel, kernel_size=(3, 3), data_format='channels_first',
                           input_shape=(n_step, channel, rs, rs),
                           padding='same', return_sequences=True))
        seq.add(BatchNormalization())

        seq.add(ConvLSTM2D(filters=channel, kernel_size=(3, 3), data_format='channels_first',
                           padding='same', return_sequences=True))
        seq.add(BatchNormalization())

        seq.add(ConvLSTM2D(filters=channel, kernel_size=(3, 3), data_format='channels_first',
                           padding='same', return_sequences=True))
        seq.add(BatchNormalization())

        seq.add(ConvLSTM2D(filters=channel, kernel_size=(3, 3), data_format='channels_first',
                           padding='same', return_sequences=True))
        seq.add(BatchNormalization())

        seq.add(Conv3D(filters=n_step, kernel_size=(3, 3, 3),
                       activation='sigmoid',
                       padding='same', data_format='channels_first'))

        seq.compile(optimizer='adadelta', loss='binary_crossentropy')
                
        seq.summary()

    return seq

#### train

In [34]:
def get_array_image(place_indices, data_array):
    all_counts = sum(count for count in counts)
    
    for index in place_indices[1]:
        row = index[5]
        col = index[6]
        for feature in range(5):
            data_array[index[feature]][row][col] += weight
    
    for channel in range(data_array.shape[0]):
        data_array[channel] = overlay_kernel(data_array[channel])
    
    return data_array

def get_dataset(routes, dates, patients):
    complete_routes = get_complete_routes(routes, dates, patients)
    all_counts = sum(count for count in counts)
    dataset = np.zeros((len(complete_routes), all_counts, size, size))

    for i, days in enumerate(complete_routes):
        sub_routes = get_array_image(days, dataset[i,:,:,:])
    
    return dataset

def train(dataset):
    n = dataset.shape[0]
    channel = dataset.shape[1]

    n_test = n - n_train

    train = dataset[:n_train,:,:]
    test = dataset[n_train:,:,:]

    X_train = []
    y_train = []
    for i in range(n_step,n_train-n_step):
        X_train.append(train[i-n_step:i, :,:])
        y_train.append(train[i:i+n_step, :,:])
    X_train, y_train = np.array(X_train), np.array(y_train)
    print(X_train.shape, y_train.shape)

    seq = get_model(channel)
    seq.fit(X_train, y_train, epochs = 50, batch_size = 32)
    seq.save('model/0517_train_1.h5')

In [16]:
dataset = get_dataset(routes, dates, patients)

In [35]:
train(dataset)

(54, 3, 45, 256, 256) (54, 3, 45, 256, 256)
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d_21 (ConvLSTM2D) (None, 3, 45, 256, 256)   145980    
_________________________________________________________________
batch_normalization_21 (Batc (None, 3, 45, 256, 256)   1024      
_________________________________________________________________
conv_lst_m2d_22 (ConvLSTM2D) (None, 3, 45, 256, 256)   145980    
_________________________________________________________________
batch_normalization_22 (Batc (None, 3, 45, 256, 256)   1024      
_________________________________________________________________
conv_lst_m2d_23 (ConvLSTM2D) (None, 3, 45, 256, 256)   145980    
_________________________________________________________________
batch_normalization_23 (Batc (None, 3, 45, 256, 256)   1024      
_________________________________________________________________
conv_lst_m

ResourceExhaustedError:  OOM when allocating tensor with shape[32,45,256,256] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node conv_lst_m2d_21/while/body/_1/convolution_2}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_keras_scratch_graph_30890]

Function call stack:
keras_scratch_graph
