# Combination of white- and blacklists

In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import time
from shapely.geometry import shape, mapping as shapely_mapping
from shapely import wkt
import re

In [2]:
# Function to save a csv, but check if it is already created
def save_file(df, csv_name, PATH):
    if os.path.isfile(f"{PATH}/{csv_name}.csv"):
        return f"File \"{csv_name}\" already created! Change filename!"
    else:
        df.to_csv(f"{PATH}/{csv_name}.csv") 
        return f"File \"{csv_name}\" successfully saved!"

In [3]:
# Specify saving path of white- and blacklists for truck or car parking lots
PATH = "data/trucks_ls" #"data/cars_ls"

## 1) Create Combined White- and Blacklists

In [4]:
# Name of white and blacklists
white_ls = ["whitelist1", "whitelist2", "whitelist3"] # adapt to your needs
black_ls = ["blacklist1", "blacklist2", "blacklist3"] # adapt to your needs

In [5]:
# Concatenate whitelists
first = True

for csv_name in white_ls:
    df = pd.read_csv(f"./{PATH}/{csv_name}.csv", index_col=0)
    
    if first:
        temp = df
        first = False
    else: 
        temp = pd.concat([temp, df])
    
print(f"Whitelists successfully loaded")
    
whitelists = temp.copy()


# Concatenate blacklists
first = True
for csv_name in black_ls:
    df = pd.read_csv(f"./{PATH}/{csv_name}.csv", index_col=0)
    
    if first:
        temp = df
        first = False
    else: 
        temp = pd.concat([temp, df])
    
print(f"Blacklists successfully loaded")

blacklists = temp.copy()

Whitelists successfully loaded
Blacklists successfully loaded


In [6]:
# Remove full duplicates
whitelists = whitelists.drop_duplicates()
blacklists = blacklists.drop_duplicates()

In [7]:
# Find images that accidentally might be in white- and blacklist
dups = pd.merge(whitelists, blacklists, how ='inner', on =['id', 'id'])
dups.shape[0] # Number of duplicate entries

1

In [8]:
# Remove images that are in both lists from both lists
whitelists = whitelists[whitelists.id.isin(dups.id) == False]
blacklists = blacklists[blacklists.id.isin(dups.id) == False]

# Check if all duplicates are removed
pd.merge(whitelists, blacklists, how ='inner', on =['id', 'id']).shape[0]

0

In [9]:
# Reset indices
whitelists = whitelists.reset_index(drop=True)
blacklists = blacklists.reset_index(drop=True)

In [10]:
whitelists

Unnamed: 0,id
0,45.4727254_3.2479538_ts1
1,43.4387796_-112.1136612_ts1
2,49.7030926_12.9882907_ts1
3,38.0495663_-102.0508654_ts1
4,35.8807974_-97.3903998_ts1
5,51.0867363_6.8169281_ts1
6,35.4300652_-84.6883_ts1
7,37.9882682_58.3524541_ts1
8,43.7569066_-89.9685034_ts1
9,45.4727254_3.2479538_ts2


In [11]:
# Manually check if a certain id is in a list
id1 = "46.419_4.863_ts1"
print("Whitelist:", whitelists.loc[whitelists["id"] == id1])
print("Blacklist:", blacklists.loc[blacklists["id"] == id1])

Whitelist: Empty DataFrame
Columns: [id]
Index: []
Blacklist: Empty DataFrame
Columns: [id]
Index: []


In [12]:
# Saving full lists (saved in same path as list pieces)
print(save_file(whitelists, "whitelist_full", PATH))
print(save_file(blacklists, "blacklist_full", PATH))

File "whitelist_full" successfully saved!
File "blacklist_full" successfully saved!


## 2) Creating sample of coordinate data according to white- and blacklists

In [13]:
# CHECKPOINT
whitelists = pd.read_csv(f"./{PATH}/whitelist_full.csv", index_col=0)
blacklists = pd.read_csv(f"./{PATH}/blacklist_full.csv", index_col=0)

In [15]:
# Create an unique id based on position of parking lot
def parking_name(row_or_pt, ts):
    try:
        pt = row_or_pt
        a = round(pt[0], 7)
        b = round(pt[1], 7)
    except:
        return np.nan
    
    return f"{a}_{b}_ts{ts}"


# Get the first point of polygon list as tuple of floats
def get_point(geom):
    try:
        pt = re.split(r",", geom[9:])[0]
        pt = tuple(map(float, pt.split(' ')))
        pt_s = (pt[1], pt[0])
    except:
        return np.nan
    
    return pt_s

In [16]:
# Loading data
# This part needs to be adapted according to database used

# THIS CAN TAKE UP SOME TIME DEPENDING ON DATA SIZE

start_time = time.localtime()
start_t = time.time()
print("Started at:", time.asctime(start_time))

file_nr = list(range(2)) # Depending on the number of files with raw data
first = True

for i in file_nr:
    df_r = pd.read_json(f"data/osm_parking_polygons_0{i}_test.json", lines=True)
    df_r["pt"] = df_r["geometry"].transform(lambda x: get_point(x))
    df_r = df_r.drop_duplicates(subset=['osm_version', 'osm_way_id', 'osm_timestamp', 'geometry', 'key', 'value', 'pt'])
    df_ts1 = df_r.copy()
    df_ts2 = df_r.copy()
    
    df_ts1["id"] = [parking_name(x, 1) for x in df_r["pt"]]
    df_ts2["id"] = [parking_name(x, 2) for x in df_r["pt"]]
    
    df = pd.concat([df_ts1, df_ts2])
    df = pd.merge(df, whitelists, how='inner', left_on='id', right_on="id")
    df = df[["id", "pt", "geometry", "all_tags"]]
    df = df.dropna()
    
    if first:
        temp = df
        first = False
    else:
        temp = pd.concat([temp, df])
    
    print(f"File nr. {i} successfully loaded")

end_t = time.time()
print("Computation time (Min.):", (end_t-start_t)/60)

Started at: Sun Jul  3 15:32:01 2022
File nr. 0 successfully loaded
File nr. 1 successfully loaded
Computation time (Min.): 0.00033362706502278644


In [17]:
# Drop full duplicates and reset index
temp = temp.drop_duplicates(subset=["id", "pt"], keep="first")
temp = temp.reset_index(drop=True)

In [18]:
temp

Unnamed: 0,id,pt,geometry,all_tags
0,45.4727254_3.2479538_ts1,"(45.4727254, 3.2479538)","POLYGON((3.2479538 45.4727254, 3.2479409 45.47...","[{'key': 'access', 'value': 'yes'}, {'key': 'a..."
1,43.4387796_-112.1136612_ts1,"(43.4387796, -112.1136612)","POLYGON((-112.1136612 43.4387796, -112.1137506...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
2,49.7030926_12.9882907_ts1,"(49.7030926, 12.9882907)","POLYGON((12.9882907 49.7030926, 12.9884926 49....","[{'key': 'amenity', 'value': 'parking'}, {'key..."
3,38.0495663_-102.0508654_ts1,"(38.0495663, -102.0508654)","POLYGON((-102.0508654 38.0495663, -102.0509062...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
4,35.8807974_-97.3903998_ts1,"(35.8807974, -97.3903998)","POLYGON((-97.3903998 35.8807974, -97.3905618 3...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
5,51.0867363_6.8169281_ts1,"(51.0867363, 6.8169281)","POLYGON((6.8169281 51.0867363, 6.8181226 51.08...","[{'key': 'access', 'value': 'private'}, {'key'..."
6,45.4727254_3.2479538_ts2,"(45.4727254, 3.2479538)","POLYGON((3.2479538 45.4727254, 3.2479409 45.47...","[{'key': 'access', 'value': 'yes'}, {'key': 'a..."
7,35.4300652_-84.6883_ts1,"(35.4300652, -84.6883)","POLYGON((-84.6883 35.4300652, -84.6885669 35.4...","[{'key': 'amenity', 'value': 'parking'}, {'key..."
8,37.9882682_58.3524541_ts1,"(37.9882682, 58.3524541)","POLYGON((58.3524541 37.9882682, 58.3529479 37....","[{'key': 'amenity', 'value': 'parking'}, {'key..."
9,43.7569066_-89.9685034_ts1,"(43.7569066, -89.9685034)","POLYGON((-89.9685034 43.7569066, -89.9685579 4...","[{'key': 'amenity', 'value': 'parking'}, {'key..."


In [19]:
# Save TRUCKS
print(save_file(temp, "osm_data_trucks_test", "data"))

File "osm_data_trucks_test" successfully saved!


In [20]:
# Save CARS
#print(save_file(temp, "osm_data_cars_test", "data"))