In [78]:
from PIL import Image
import kagglehub
import numpy as np
import os
import random

def train_test_split(path, split=0.75, max=None):
    items = [f"{path}/{i}" for i in os.listdir(path) if "png" in i]
    items = items[:max] if max is not None else items
    random.shuffle(items)
    split_index = int(len(items) * split)
    train, test = items[:split_index], items[split_index:]
    return train, test


def csv_loader(path):
    coords = []
    with open(path) as f:
        lines = f.readlines()

    for line in lines:
        lat, lng = line.strip().split(",")
        coords.append([float(lat), float(lng)])

    return coords


# 10,000 images, zero-indexed.png
path = kagglehub.dataset_download("paulchambaz/google-street-view")
path = f"{path}/dataset"

coords = csv_loader(f"{path}/coords.csv")

print(f"{len(coords)} coordinates loaded (lat, lng)")

train, test = train_test_split(path, split=0.75, max=100)
print(f"{len(train)} images for train, {len(test)} images for test")

10000 coordinates loaded (lat, lng)
75 images for train, 25 images for test


In [79]:
from numpy.typing import NDArray

class Neuron:
    def __init__(self, inputs: int):
        self.weights = np.random.randn(inputs)
        self.bias = random.random()

    def forward(self, inputs: NDArray[np.float64]):
        return self.weights.T @ inputs + self.bias

In [80]:
# I call the inputs (incoming nodes) 'x', and outgoing (nodes that I affect) 'o'

class Layer():
    compiled = False
    len = None
    
    def compile(self, inputs: int = None):
        if self.len is None:
            if inputs is None:
                raise Exception("The layer has an undefined size")
            self.len = inputs
        self.compiled = True

    def forward(self, x):
        pass

    def backward(self, o):
        pass 

    def __len__(self):
        return self.len
        
# ReLU and LeakyReLU together
class ReLU(Layer):
    def __init__(self, c=0):
        self.c = c

    def forward(self, x):
        return np.where(x > 0, x, self.c * x)


class Sigmoid(Layer):
    def __init__(self):
        pass
    
    def forward(self, x):
        return 1 / (1 + np.exp(-x))

class FC(Layer):
    def __init__(self, outputs, wpn):
        self.len = outputs
        self.inputs = wpn

    def compile(self, inputs):
        if inputs is None and self.inputs is None:
            raise Exception("This layer has an undefined size")
        self.inputs = self.inputs if self.inputs is not None else inputs
        self.neurons = [Neuron(self.inputs) for _ in range(self.len)]

    def forward(self, x):
        if len(x.shape) == 2: # different input for each neuron
            return np.array([neuron.forward(x[index]) for index, neuron in enumerate(self.neurons)])
        elif len(x.shape) == 1: # same input for each neuron
            return np.array([neuron.forward(x) for neuron in self.neurons])
        raise Exception("This layer is defined incorrectly.")
        
    def __repr__(self):
        return f"FC layer, with {self.inputs} inputs and {self.len} outputs"

class Softmax(Layer):
    def __init__(self):
        pass

    def forward(self, x):
        sum_exp = np.exp(x).sum()
        return np.exp(x) / sum_exp


In [81]:
from typing import List

class Model:
    def __init__(self, lr=0.0001, epochs=100):
        self.lr = lr
        self.epochs = epochs
        self.layers: List[Layer] = []

    def sigmoid(self):
        self.layers.append(Sigmoid())
        return self
    
    def relu(self):
        self.layers.append(ReLU())
        return self

    def leaky_relu(self, c: float):
        self.layers.append(ReLU(c=c))
        return self
    
    def dense(self, outputs, wpn=None):
        self.layers.append(FC(outputs, wpn))
        return self

    def softmax(self):
        self.layers.append(Softmax())
        return self
    
    def compile(self):
        inputs = None
        for layer in self.layers:
            layer.compile(inputs)
            inputs = len(layer)

    def train(self, data, correct):
        for inputs, target in zip(data, correct):
            if len(self.layers) == 0:
                raise Exception("No layers to train")

            if inputs.shape[0] != len(self.layers[0]):
                raise Exception("Data/input-layer shape mismatch")

            for layer in self.layers:
                inputs = layer.forward(inputs)

            if inputs.shape != target.shape:
                raise Exception("Output-layer/target shape mismatch")

    def log(self):
        for layer in self.layers:
            print(layer)



In [82]:
OUT_H, OUT_W = 16, 32

model = (
    Model(lr=0.0001, epochs=100)
    .dense(400, wpn=3072)  # 20 x 20 patches, 3072 weights per neuron
    .sigmoid()
    .dense(320)
    .sigmoid()
    .dense(OUT_H * OUT_W) # broadcast back up to heatmap coordinates
    .sigmoid()
    .softmax()
)

model.compile()
# model.log()

In [83]:
def load_imgs(images):
    return np.array([np.asarray(Image.open(img)) for img in images])


def load_correct(images, height, width, sigma=1.0):
    images = [image.split("/")[-1] for image in images]
    indices = [int(image.split(".")[0]) for image in images]
    positions = [coords[index] for index in indices]
    correct = []
    for lat, lng in positions:
        array = np.zeros((height, width))
        lat_index = int((lat + 90) / 180 * height)
        lat_index = min(height - 1, lat_index)
        lon_index = int((lng + 180) / 360 * width)
        lon_index = min(width - 1, lon_index)
        for y in range(height):
            for x in range(width):
                # gaussian distribution for heatmap
                dist = (x - lon_index)**2 + (y - lat_index)**2
                array[y, x] = np.exp(-dist / (2 * sigma**2))
        correct.append(array.flatten())
    return np.array(correct)

# Need data in the right shape
# Incoming: [N, 640, 640, 3] 
# Returns: -> [N, 3 x 640 x 640]
def format_frames(frames, patch_size=32):
    N, H, W, C  = frames.shape
    num_patches_h = H // patch_size
    num_patches_w = W // patch_size
    # this took SO much experimentation, but you need to do this to ensure the patches are preserved
    frames = frames.reshape(N, num_patches_h, patch_size, num_patches_w, patch_size, 3)
    frames = frames.transpose(0, 1, 3, 2, 4, 5)
    frames = frames.reshape(N, -1, patch_size, C)
    frames = frames.transpose(0, 3, 1, 2)
    frames = frames.reshape(N, num_patches_h * num_patches_w, -1)
    return frames / 255.0  # normalize to 0-1


train_imgs = load_imgs(train)
train_correct = load_correct(train, OUT_H, OUT_W, sigma=5)
f_frames = format_frames(train_imgs)
print(f_frames.shape)
model.train(data=f_frames, correct=train_correct)

(75, 400, 3072)
