**Packages**

In [1]:
import math
import matplotlib
import matplotlib.pyplot as plt

import numpy as np

import os

import pandas as pd

import scipy
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelBinarizer

import tensorflow as tf
import tensorflow_decision_forests as tfdf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import concatenate
from keras.layers import Flatten, Dense, Dropout
import tifffile

2023-03-30 22:36:57.086576: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


**Data Pre-Processing**

In [2]:
tiles = pd.read_csv(r'./Data/Tiles_binned_zipcode.csv')
tiles.head(5)

Unnamed: 0,Tile_ID,Long2,Lat2,Long1,Lat1,Mid_lat,Mid_long,Stop_Signs,Paving_historical,Paving_future,...,94129,94130,94131,94132,94133,94134,94141,94143,94158,94188
0,36,-122.514446,37.779636,-122.513306,37.778732,37.779184,-122.513876,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
1,37,-122.514446,37.778732,-122.513306,37.777829,37.77828,-122.513876,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,151,-122.513306,37.779636,-122.512166,37.778732,37.779184,-122.512736,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,152,-122.513306,37.778732,-122.512166,37.777829,37.77828,-122.512736,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,153,-122.513306,37.777829,-122.512166,37.776925,37.777377,-122.512736,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0


Split into training and test

In [3]:
x_train, x_test, y_train, y_test = train_test_split(tiles[['Tile_ID', 'Long2', 'Lat2', 'Long1', 'Lat1', 'Mid_lat', 'Mid_long',
       'Stop_Signs', 'Paving_historical', 'Paving_future', 'Bus_stop',
       'Collisions_Future', 'Collisions_Historical', 'RTTYP_I',
       'RTTYP_M', 'RTTYP_O', 'RTTYP_S', 'RTTYP_U', 'Collisions_Future_binary',
       'Collisions_Historical_binary', 'bins_numeric', 'zip_code', '94101',
       '94102', '94104', '94105', '94107', '94108', '94109', '94110', '94111',
       '94112', '94114', '94115', '94116', '94117', '94118', '94121', '94122',
       '94123', '94124', '94127', '94129', '94130', '94131', '94132', '94133',
       '94134', '94141', '94143', '94158', '94188']], 
                                   tiles['bin'],
                                   random_state=104, 
                                   test_size=0.20, 
                                   shuffle=True)

Image Pre-Processing

In [4]:
x_train_len = len(x_train)
x_test_len = len(x_test)
print('x_train_len', x_train_len)
print('x_test_len', x_test_len)

x_train_len 8376
x_test_len 2095


In [5]:
int(x_train_len/4)

2094

In [6]:
2*int(x_train_len/4)

4188

In [7]:
3*int(x_train_len/4)

6282

In [8]:
4*int(x_train_len/4)

8376

In [9]:
IMAGE_PATH = './Satellite Imagery/Satellite Images Tiled/' 

def preprocess_data_part1(IMAGE_PATH):
    """ Generate lists of images and labelsbased on temp_no_refer and temp_refer lists
    
    Params:
    -------
    IMAGE_PATH (str): path to directory with images.
    
    Returns:
    --------
    images_mini  (np.ndarray): Images of shape (N, 149 3)
    """
    
    data_mini1 = []
    for id in x_train['Tile_ID'][0:int(x_train_len/4)]:    
                    
        # read image and store as matrix            
        # Index at the end makes all images the same size (they sometimes differ by 1 pixel)
        image = tifffile.imread(IMAGE_PATH + str(id) + '.tif')[0:148, 0:188, :]
        
        #grayscale
        for i in image:
            for j in i:
                data_mini1.append([np.mean(j[0:3]), j[3]])
 
    # stack images and trasnform to array
    images_mini1 = np.stack(data_mini1)
    
    return images_mini1

In [10]:
images_mini1 = preprocess_data_part1(IMAGE_PATH)

In [32]:
print('train ', np.shape(images_mini1))

train  (58263456, 2)


In [12]:
IMAGE_PATH = './Satellite Imagery/Satellite Images Tiled/' 

def preprocess_data_part2(IMAGE_PATH):
    """ Generate lists of images and labelsbased on temp_no_refer and temp_refer lists
    
    Params:
    -------
    IMAGE_PATH (str): path to directory with images.
    
    Returns:
    --------
    images_mini  (np.ndarray): Images of shape (N, 149 3)
    """
    
    data_mini2 = []
    for id in x_train['Tile_ID'][int(x_train_len/4):2*int(x_train_len/4)]:    
                    
        # read image and store as matrix            
        # Index at the end makes all images the same size (they sometimes differ by 1 pixel)
        image = tifffile.imread(IMAGE_PATH + str(id) + '.tif')[0:148, 0:188, :]
        
        #grayscale
        for i in image:
            for j in i:
                data_mini2.append([np.mean(j[0:3]), j[3]])
    
    # stack images and trasnform to array
    images_mini2 = np.stack(data_mini2)
    
    return images_mini2

In [14]:
images_mini2 = preprocess_data_part2(IMAGE_PATH)

In [15]:
print('train ', np.shape(images_mini2))

train  (58263456, 2)


In [16]:
IMAGE_PATH = './Satellite Imagery/Satellite Images Tiled/' 

def preprocess_data_part3(IMAGE_PATH):
    """ Generate lists of images and labelsbased on temp_no_refer and temp_refer lists
    
    Params:
    -------
    IMAGE_PATH (str): path to directory with images.
    
    Returns:
    --------
    images_mini  (np.ndarray): Images of shape (N, 149 3)
    """
        
    data_mini3 = []
    for id in x_train['Tile_ID'][2*int(x_train_len/4):3*int(x_train_len/4)]:    
                    
        # read image and store as matrix            
        # Index at the end makes all images the same size (they sometimes differ by 1 pixel)
        image = tifffile.imread(IMAGE_PATH + str(id) + '.tif')[0:148, 0:188, :]
        
        #grayscale
        for i in image:
            for j in i:
                data_mini3.append([np.mean(j[0:3]), j[3]])
    
    # stack images and trasnform to array
    images_mini3 = np.stack(data_mini3)
    
    return images_mini3

In [17]:
images_mini3 = preprocess_data_part3(IMAGE_PATH)

In [18]:
print('train ', np.shape(images_mini3))

train  (58263456, 2)


In [20]:
IMAGE_PATH = './Satellite Imagery/Satellite Images Tiled/' 

def preprocess_data_part4(IMAGE_PATH):
    """ Generate lists of images and labelsbased on temp_no_refer and temp_refer lists
    
    Params:
    -------
    IMAGE_PATH (str): path to directory with images.
    
    Returns:
    --------
    images_mini  (np.ndarray): Images of shape (N, 149 3)
    """
    
    data_mini4 = []
    for id in x_train['Tile_ID'][3*int(x_train_len/4):4*int(x_train_len/4)]:    
                    
        # read image and store as matrix            
        # Index at the end makes all images the same size (they sometimes differ by 1 pixel)
        image = tifffile.imread(IMAGE_PATH + str(id) + '.tif')[0:148, 0:188, :]
        
        #grayscale
        for i in image:
            for j in i:
                data_mini4.append([np.mean(j[0:3]), j[3]])
 
    # stack images and trasnform to array
    images_mini4 = np.stack(data_mini4)
    
    return images_mini4

In [21]:
images_mini4 = preprocess_data_part4(IMAGE_PATH)

In [22]:
print('train ', np.shape(images_mini4))

train  (58263456, 2)


In [23]:
images_mini_half1 = np.concatenate((images_mini1, images_mini2), axis=0)

In [24]:
images_mini_half2 = np.concatenate((images_mini3, images_mini4), axis=0)

In [25]:
images_mini = np.concatenate((images_mini_half1, images_mini_half2), axis=0)

In [29]:
print('train ', np.shape(images_mini))

train  (233053824, 2)


In [30]:
images_mini

array([[162.      ,  91.      ],
       [160.66667 , 103.      ],
       [139.      ,  70.      ],
       ...,
       [121.      ,  40.      ],
       [109.      ,  35.      ],
       [109.333336,  34.      ]], dtype=float32)

In [27]:
IMAGE_PATH = './Satellite Imagery/Satellite Images Tiled/' 

def preprocess_data_part5(IMAGE_PATH):
    """ Generate lists of images and labelsbased on temp_no_refer and temp_refer lists
    
    Params:
    -------
    IMAGE_PATH (str): path to directory with images.
    
    Returns:
    --------
    images_mini  (np.ndarray): Images of shape (N, 149 3)
    """
    
    data_mini_test = []
    for id in x_test['Tile_ID']:    
                    
        # read image and store as matrix            
        # Index at the end makes all images the same size (they sometimes differ by 1 pixel)
        image = tifffile.imread(IMAGE_PATH + str(id) + '.tif')[0:148, 0:188, :]
        
        #grayscale
        for i in image:
            for j in i:
                data_mini_test.append([np.mean(j[0:3]), j[3]])
        
        # append to images
        data_mini_test.append(image)
 
    #stack images and trasnform to array
    images_mini_test = np.stack(data_mini_test)
    
    return images_mini_test

In [28]:
images_mini_test = preprocess_data_part5(IMAGE_PATH)

ValueError: all input arrays must have the same shape

In [None]:
print('train ', np.shape(images_mini_test))

Street Data Pre-Processing

In [31]:
street = np.asarray(x_train[['Tile_ID', 'Long2', 'Lat2', 'Long1', 'Lat1', 'Mid_lat', 'Mid_long',
       'Stop_Signs', 'Paving_historical', 'Paving_future', 'Bus_stop',
       'Collisions_Future', 'Collisions_Historical', 'RTTYP_I', 'RTTYP_M',
       'RTTYP_O', 'RTTYP_S', 'RTTYP_U', 'Collisions_Future_binary',
       'Collisions_Historical_binary', 'bins_numeric', 'zip_code', '94101',
       '94102', '94104', '94105', '94107', '94108', '94109', '94110', '94111',
       '94112', '94114', '94115', '94116', '94117', '94118', '94121', '94122',
       '94123', '94124', '94127', '94129', '94130', '94131', '94132', '94133',
       '94134', '94141', '94143', '94158', '94188']]).astype('float32')
street_mini = []
for row in range(len(street)):
    street_mini.append([[street[row]]])
street_mini = np.stack(street_mini)
print('train ', np.shape(street_mini))

street_test = np.asarray(x_test[['Tile_ID', 'Long2', 'Lat2', 'Long1', 'Lat1', 'Mid_lat', 'Mid_long',
       'Stop_Signs', 'Paving_historical', 'Paving_future', 'Bus_stop',
       'Collisions_Future', 'Collisions_Historical', 'RTTYP_I', 'RTTYP_M',
       'RTTYP_O', 'RTTYP_S', 'RTTYP_U', 'Collisions_Future_binary',
       'Collisions_Historical_binary', 'bins_numeric', 'zip_code', '94101',
       '94102', '94104', '94105', '94107', '94108', '94109', '94110', '94111',
       '94112', '94114', '94115', '94116', '94117', '94118', '94121', '94122',
       '94123', '94124', '94127', '94129', '94130', '94131', '94132', '94133',
       '94134', '94141', '94143', '94158', '94188']]).astype('float32')
street_mini_test = []
for row in range(len(street_test)):
    street_mini_test.append([[street_test[row]]])
street_mini_test = np.stack(street_mini_test)
print('test ',np.shape(street_mini_test))

train  (8376, 1, 1, 52)
test  (2095, 1, 1, 52)


In [None]:
233053824/8376

In [34]:
input_image_street = np.hstack(
    (street_mini.reshape((8376,52)),
     images_mini.reshape(8376,27824*2))
)
np.shape(input_image_street)

(8376, 55700)

In [None]:
input_image_street_test = np.hstack(
    (street_mini_test.reshape((2095,52)),
     images_mini_test.reshape(2095,2))
)
np.shape(input_image_street_test)

In [None]:
# from pathlib import Path
# file_path = Path('/home/ubuntu/noriel/210_Capstone_Aditya_Arisa_Noriel/Data/images_mini_greyscale.csv')
# pd.DataFrame(input_image_street).to_csv(file_path,index=False)

In [None]:
# from pathlib import Path
# file_path = Path('/home/ubuntu/noriel/210_Capstone_Aditya_Arisa_Noriel/Data/images_mini_test_greyscale.csv')
# pd.DataFrame(input_image_street_test).to_csv(file_path,index=False)

In [35]:
gbt_model = tfdf.keras.GradientBoostedTreesModel() #categorical_algorithm = 'CART'

gbt_model.fit(input_image_street,
    y_train,
    #validation_data=[x_test[['Collisions_Historical', 'Mid_lat','Mid_long', 'Stop_Signs', 'Paving_historical', 'Bus_stop']], y_test],     
    epochs=1,
    verbose=1)

2023-03-31 00:47:43.550625: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


Use /tmp/tmpemyvxi1b as temporary training directory
Reading training dataset...


2023-03-31 00:47:43.948258: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-03-31 00:47:43.948574: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-03-31 00:47:43.949509: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Location:
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()

  File "/opt/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()

  File "/usr/local/lib/python3.10/a

Location:
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()

  File "/opt/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()

  File "/usr/local/lib/python3.10/a

Location:
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()

  File "/opt/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()

  File "/usr/local/lib/python3.10/a

Location:
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()

  File "/opt/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()

  File "/usr/local/lib/python3.10/a

Location:
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,

  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()

  File "/opt/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    app.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
    self.io_loop.start()

  File "/opt/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
    self._run_once()

  File "/usr/local/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
    handle._run()

  File "/usr/local/lib/python3.10/a

2023-03-31 01:07:51.805204: W tensorflow/core/framework/op_kernel.cc:1807] OP_REQUIRES failed at cast_op.cc:121 : UNIMPLEMENTED: Cast string to int32 is not supported


UnimplementedError: Graph execution error:

2 root error(s) found.
  (0) UNIMPLEMENTED:  Cast string to int32 is not supported
	 [[{{node Cast}}]]
  (1) CANCELLED:  Function was cancelled before it was started
0 successful operations.
0 derived errors ignored. [Op:__inference__consumes_training_examples_until_eof_334405]

In [None]:
print(gbt_model.summary())

In [None]:
##uncomment to show training log
gbt_model.make_inspector().training_logs()

In [None]:
gbt_model.make_inspector().evaluation()

In [None]:
import matplotlib.pyplot as plt

logs = gbt_model.make_inspector().training_logs()

plt.plot([log.num_trees for log in logs], [log.evaluation.loss for log in logs], label="training data")
plt.xlabel("Number of trees")
plt.ylabel("Loss")
plt.legend()

plt.show()

In [None]:
gbt_model.evaluate(input_image_street_test)

In [None]:
gbt_model.predict(input_image_street_test)

In [None]:
predicted_result = gbt_model.predict(input_image_street_test)
test_values = []
for i in predicted_result: 
    input_list = i
    max_value = max(input_list)
    index = [index for index, item in enumerate(input_list) if item == max_value]
    test_values.append(index[0])
print('macro f1: ', f1_score(y_test, test_values, average = 'macro' ))
print('f1 by class: ', f1_score(y_test, test_values, average = None ))