## Take lat, long traces and convert it to heatmap images based on the frequency matrix transformation

In [2]:
import os
import math
import skmob
import matplotlib
import numpy as np
import pandas as pd
from PIL import Image
import seaborn as sns
from tqdm import tqdm
import geopy.distance
import matplotlib.pyplot as plt
from geopy.geocoders import Nominatim
from haversine import haversine, Unit
from skmob.preprocessing import detection

In [3]:
# Most of the heatmap creation functionalities were used/modified from jeffmur's geoLife repository
def logBase(max, val):
    if val == 0:
        return 0
    return math.log(val, max)

In [4]:
def takeLog(maxVal, freq_heat):
    # For each row, normalize each data point
    # By their maximum values between 0 and 1
    shape = freq_heat.shape
    log_freq = pd.DataFrame(0, index=range(shape[0]), columns=range(shape[1]))
    if maxVal <= 1:
        return log_freq

    for row in freq_heat.itertuples():
        # Need row index for assignment
        for c in range(1, len(row)):
            # Capture data point @ [row, column]
            data = row[c]
            d = logBase(maxVal, data)
            log_freq.loc[row[0], c - 1] = d

    return log_freq

In [5]:
def createMap(location, cell_size=5):
    
    ## Calculate bounds
    sLat = float(location[0])
    nLat = float(location[1])
    wLon = float(location[2])
    eLon = float(location[3])

    # all four corners
    SE = [sLat, eLon]
    SW = [sLat, wLon]
    NE = [nLat, eLon]
    NW = [nLat, wLon]

    bounds = {"SE": SE, "SW": SW, "NE": NE, "NW": NW}

    # Calculate Distance
    # SW -> NW
    width = math.ceil(geopy.distance.geodesic(SW, NW).miles)

    # NW -> NE
    length = math.ceil(geopy.distance.geodesic(NW, NE).miles)

    # Image Dimensions
    l_pix = int(math.ceil(length / cell_size))
    w_pix = int(math.ceil(width / cell_size))

    # Step Size for Lat/Lon comparison
    # Max distance / num of pixels
    step_length = (nLat - sLat) / l_pix  #  Step Lenth
    step_width = (eLon - wLon) / w_pix  #  Step Width

    # Steps in degrees
    step = {"width": step_width, "length": step_length}

    # Calculated Width and Length of image
    pix = {"length": l_pix, "width": w_pix}

    return bounds, step, pix

In [6]:
def frequencyHeatmap(bounds, pix, step, stdf):
    # Generates a Frequency Matrix
    nLat = bounds["NE"][0]
    eLon = bounds["NE"][1]

    columns = pix["width"]
    rows = pix["length"]

    step_w = step["width"]
    step_l = step["length"]

    freq_heat = pd.DataFrame(0, index=range(rows + 1), columns=range(columns + 1))
    lonLat = stdf[stdf.columns[0:2]].to_numpy()

    maxVal = 0

    for location in lonLat:
        r = round((nLat - location[0]) / step_l)
        c = round((eLon - location[1]) / step_w)

        if (c <= columns) and (c >= 0) and (r <= rows) and (r >= 0):
            freq_heat.loc[r, c] += 1

            if maxVal < freq_heat.loc[r, c]:
                maxVal = freq_heat.loc[r, c]
    return maxVal, freq_heat

In [7]:
def genFMprime(log_df):
    # Generates an image representation of the Frequency Matrix
    dim = log_df.shape

    img = Image.new("RGB", (dim[0], dim[1]), color="red")
    pixels = img.load()

    for row in log_df.itertuples():
        # Need row index for assignment
        for c in range(1, len(row)):
            # Capture data point at [row, column]
            data = row[c]
            freq = int(255 * data)
            pixels[row[0], c - 1] = (freq, freq, freq)

    return img

In [8]:
def dropOutlyingData(df, boundingbox):
    # Remove data outside the bounding box of Beijing
    lat = boundingbox[0:2]
    lon = boundingbox[2:4]

    return df.loc[
        (df.lng >= float(lon[0]))
        & (df.lng <= float(lon[1]))
        & (df.lat >= float(lat[0]))
        & (df.lat <= float(lat[1]))
    ].reset_index(drop=True)

In [9]:
df = pd.read_csv('/data/fiona123/GeolifeTrajectories/Data.csv')  
df.drop('Unnamed: 0', axis=1, inplace=True)
df.head(5)

Unnamed: 0,lat,lng,alt,subfolder,datetime,ym
0,39.975347,116.324503,0.0,93,2008-08-15 19:32:28,2008_8
1,39.975347,116.324503,0.0,93,2008-08-15 19:32:30,2008_8
2,39.975353,116.324497,0.0,93,2008-08-15 19:32:32,2008_8
3,39.975382,116.324448,0.0,93,2008-08-15 19:32:34,2008_8
4,39.97544,116.324373,0.0,93,2008-08-15 19:32:36,2008_8


In [10]:
# Location of Beijing

# Can also be obtained from:
# app = Nominatim(user_agent="geoLife")
# location = app.geocode(cityCountry).raw
# print(location["boundingbox"])

boundingBox = [
    "39.746217",
    "40.066217",
    "116.2312757",
    "116.5512757",
]

In [11]:
df_copy = df.copy()

In [12]:
# The dimension of the area covered on the ground and represented by a single pixel
CELL_SIZE = 500
SQ_CELL = float(CELL_SIZE) * 0.00062137 # sq miles

In [None]:
uid_list = df_copy.subfolder.unique()

# Creates 182 directories for each user
for i in tqdm(uid_list):
    #path = "/data/fiona123/GeolifeTrajectories/OutputHeatMap/" + str(i) # without resizing
    path = "/data/fiona123/GeolifeTrajectories/ResizedHeatMap_28/" + str(i)
    print(i)
    os.mkdir(path)
    

ym_list = df_copy.ym.unique()

# Creates individual trajectory image heatmap for each user on a specified month-year
for i in tqdm(uid_list): 
    #path = "/data/fiona123/GeolifeTrajectories/OutputHeatMap/" + str(i) # without resizing
    path = "/data/fiona123/GeolifeTrajectories/ResizedHeatMap_28/" + str(i)  
    for j in ym_list:
        df_user_month = df_copy[df_copy['subfolder'] == i]
        df_user_month = df_user_month[df_user_month['ym'] == j]       
        df_user_month = dropOutlyingData(df_user_month, boundingBox)
        if(len(df_user_month) == 0):
            del df_user_month 
            continue
        else:
            bounds, step, pix = createMap(boundingBox, cell_size=SQ_CELL)
            maxVal, freq_heat = frequencyHeatmap(bounds, pix, step, df_user_month)
            log_df = takeLog(maxVal, freq_heat)
            img = genFMprime(log_df)
            image_name = path + "/" + str(j)
            #print(image_name + '.png') # without resizing            
            img = img.convert("L")
            img = img.resize((28, 28))
            img.save(image_name + '.png', "PNG")