### Import Packages

In [None]:
import os
import re
import numpy as np

import rasterio
import geopandas as gpd
from osgeo import gdal
import pickle

from PIL import Image
import matplotlib.pyplot as plt
from skimage.io import imread

from functions5 import gdf_to_array, convert_to_8Bit
HOME = os.path.expanduser("~")

In [None]:
data_string = HOME + '/new_project/data/AOI_2_Vegas_Train/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_img1017.tif'
shape_string = HOME + '/new_project/data/AOI_2_Vegas_Train/geojson/buildings_AOI_2_Vegas_img1017.geojson'

### Explore TIF Files

In [None]:
dataset = rasterio.open(data_string)

In [None]:
dataset.name

In [None]:
dataset.count

In [None]:
print(dataset.width)
print(dataset.height)

In [None]:
dataset.meta

In [None]:
dataset.bounds

In [None]:
band1 = dataset.read(1)
band1

### Convert TIF FILES to 8-bit

In [None]:
filepath = HOME + '/new_project/data/test_data/RGB-PanSharpen/'
for file in os.listdir(filepath):
    new_dir = HOME '/new_project/data/test_data/RGB-PanSharpen-8bit/'
    if not os.path.exists(str(new_dir) + str(file)):
#         print(file)
        convert_to_8Bit(str(filepath) + str(file), str(new_dir)+str(file))

### Explore Geojsons

In [None]:
shapes = gpd.read_file(shape_string)

In [None]:
shapes.shape

In [None]:
shapes.crs

In [None]:
shapes.bounds

### Create Image Masks

In [None]:
i = os.listdir(HOME + '/new_project/data/AOI_2_Vegas_Train/RGB-PanSharpen/')
sorted(i)

In [None]:
for file in os.listdir(HOME + '/new_project/data/AOI_2_Vegas_Train/geojson/'):
    new_dir = HOME #+ '/new_project/data/masks/'
    regex = str(re.search('(\d+)(?=.geojson)', file))
    data_string = HOME + '/new_project/data/AOI_2_Vegas_Train/RGB-PanSharpen/RGB-PanSharpen_AOI_2_Vegas_img' + regex + '.tif'
    try:
        shape = gpd.read_file(file)
        gdf_to_array(shape, data_string, new_dir + 'img_' + str(regex), burnValue=150)
    except:
        continue

### Explore Image Masks

In [None]:
im = Image.open(HOME + '/new_project/data/masks/img_1017')

In [None]:
width, height = im.size

In [None]:
width

In [None]:
height

In [None]:
im

### Prepare Image Masks

In [None]:
# Y - Target
mask_list = []

mask_filepath = HOME + '/new_project/data/masks/'
for file in os.listdir(mask_filepath):
    mask_list.append(str(mask_filepath) + str(file))
    
mask_list_sorted = sorted(mask_list)

mask_train = []
for mask in mask_list_sorted:
    raster = gdal.Open(mask, gdal.GA_ReadOnly)
    data = raster.GetRasterBand(1).ReadAsArray()
    data = data.reshape(650, 650, 1)
    mask_train.append(data)
    
len(mask_train)

In [None]:
filepath = HOME + '/new_project/data/pickles/mask_train.pkl'
with open(filepath, 'wb') as pkl:
    pickle.dump(mask_train,pkl)

### Prepare Source Images

In [None]:
# X - Predictors
tif_list = []
tif_filepath = HOME + '/new_project/data/AOI_2_Vegas_Train/RGB-PanSharpen-8bit/'
for file in os.listdir(tif_filepath):
    if file.endswith('.tif'):
        tif_list.append(str(tif_filepath) + str(file))

tif_list_stack = np.stack(tif_list, axis=0)
tif_list_sorted = sorted(tif_list_stack)

tif_matched = []
for mask in mask_list_sorted:
    regex = re.search('img_(\d+)', mask).group(1)
    tif_path = HOME + '/new_project/data/AOI_2_Vegas_Train/RGB-PanSharpen-8bit/RGB-PanSharpen_AOI_2_Vegas_img' + str(regex) + '.tif'
    if tif_path in tif_list_sorted:
        tif_matched.append(tif_path)
        
tif_train = []
for tif in tif_matched:
    data = imread(tif, plugin='tifffile')
    tif_train.append(data)

len(tif_train)

In [None]:
filepath = HOME + '/new_project/data/pickles/tif_train.pkl'
with open(filepath, 'wb') as pkl:
    pickle.dump(tif_train,pkl)