In [None]:
#!pip install geopandas
#!pip install python-dotenv
#!pip install google_streetview

In [None]:
#pd.read_csv("../data/raw/infer_coordinates_quentin_large.csv")

__Note:__ For underlying code to work, you need developer - here randomly named Quentin - to give you a csv file with coordinates, in WKT format. In the example below, this field is called `representative_point`. You could also do this for 2 `lat`, `lng` fields, in geopandas as well.

In [1]:
import pandas as pd
import geopandas as gpd
from shapely import wkt

quentin = pd.read_csv("../data/raw/infer_coordinates_quentin.csv")
quentin['geometry'] = quentin.representative_point.apply(wkt.loads)
quentin.drop('representative_point', axis=1, inplace=True)

# Geopandas GeoDataFrame
quentin_gpd = gpd.GeoDataFrame(quentin, geometry='geometry')
quentin_gpd["lat"] = quentin_gpd.geometry.y
quentin_gpd["long"] = quentin_gpd.geometry.x
quentin_gpd.head()



Unnamed: 0,WS_OIDN,street_segment,geometry,lat,long
0,74369,MULTILINESTRING ((4.386223617528176 51.2033170...,POINT (4.38618 51.20346),51.203455,4.386177
1,74392,MULTILINESTRING ((4.386223617528176 51.2033170...,POINT (4.38600 51.20327),51.203271,4.386005
2,74395,MULTILINESTRING ((4.385785324288156 51.2033175...,POINT (4.38579 51.20346),51.203463,4.385793
3,74397,MULTILINESTRING ((4.420672518147644 51.2393070...,POINT (4.42063 51.23889),51.238891,4.420625
4,74399,MULTILINESTRING ((4.442336207146947 51.2068766...,POINT (4.44154 51.20601),51.206009,4.441545


In [2]:
quentin_gpd.shape

(6179, 5)

In [None]:
from dotenv import load_dotenv
import os
load_dotenv() 
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
STREETVIEW_DOWNLOAD_DIR = "../data/raw/google_streetview/"
#GOOGLE_API_KEY


In [4]:
quentin_gpd.explore(tiles="Stamen Toner", marker_type="circle")

In [None]:
import google_streetview.api
import google_streetview.helpers
import os
import time

_all_pano_ids = ['_'.join(x.split("_")[:-3]) for x in os.listdir(STREETVIEW_DOWNLOAD_DIR)]

actual_locations = []
status_list = []

heading_map = {
    0: "N_0",
    1: "E_90",
    2: "S_180",
    3: "W_270",
}

def join_coords(rowset):
    rowset["coord"] = rowset["lat"].astype(str).str.cat(rowset["long"].astype(str), sep=",")
    return "; ".join(rowset["coord"].values)

j=-1

quentin_gpd["bucket"] = quentin_gpd["WS_OIDN"] % 200

for i, (bucket, locations) in quentin_gpd.groupby(by="bucket").apply(lambda x: join_coords(x)).reset_index().iterrows():
    print(f"{bucket} - {len(locations.split('; '))}")
    apiargs= {
            'size': '640x640',
            'heading': '0;90;180;270',
            "location": locations,
            'key': GOOGLE_API_KEY
    }
    # Get a list of all possible queries from multiple parameters
    api_list = google_streetview.helpers.api_list(apiargs)
    print("got api_list")    
    
    # Create a results object for all possible queries
    results = google_streetview.api.results(api_list)
    print("got results")
    #print(results.metadata)
    results_panos = []
    for item in results.metadata:
        if "pano_id" in item:
            if item["pano_id"] not in _all_pano_ids:
                results_panos.append(item["pano_id"])
              
    if len(results_panos) > 0.8 * len(results.metadata):
        print("already downloaded almost all panoramas")
        continue
          
    
    #print(results.metadata[::4])
    results.download_links(os.path.join(STREETVIEW_DOWNLOAD_DIR, 'tmp'))
    print("downloaded streetviews")
    #print(os.listdir(os.path.join(STREETVIEW_DOWNLOAD_DIR, 'tmp')))
    #print(results.metadata)
    
    for ix, item in enumerate(results.metadata):
        
        if item in [{'status': 'NOT_FOUND'}, {'status': 'ZERO_RESULTS'}]:
            print(ix, "error: ", item)
        else:
            j+=1
            if "pano_id" not in item:
                print(f"{j:04d} pano_id not found in metadata! {results.metadata}")
                continue
            _pano_id = item["pano_id"]
            #print("getting", _pano_id)

            if _pano_id in _all_pano_ids:
                print(f"{j:04d} already downloaded!")
                _status = {
                        "i": j,
                        "coord": api_list[ix]["location"],  
                        "location": item["location"],
                        "status": "ok", 
                        "metadata": item,
                        "pano_id": _pano_id,
                        "date": item["date"],
                        "duplicate": False
                    }
                status_list.append(_status)
            else:
                _status = {
                    "i": i, 
                    "coord": api_list[ix]["location"], 
                    "location": item["location"],
                    "status": "ok", 
                    "metadata": item,
                    "pano_id": _pano_id,
                    "date": item["date"],
                    "duplicate": False
                }
                
                heading_key = int(api_list[ix]["heading"])/90
                heading_val = heading_map[heading_key]
                _src = os.path.join(STREETVIEW_DOWNLOAD_DIR, "tmp", item["_file"])
                _dst = os.path.join(STREETVIEW_DOWNLOAD_DIR, f"{_pano_id}_{int(heading_key)}_{heading_val}.jpg")
                if os.path.exists(_src):
                    os.rename(_src, _dst)
                    status_list.append(_status)
                else:
                    print(ix, f"file download failed for {_dst} from source {_src}")
                    _status.update({"status": "failed"})
                    status_list.append(_status)


In [None]:
status_df = pd.DataFrame(status_list)
status_df.to_pickle("../data/processed/20220404_streetview_location_panorama_quentin_large_df.pkl", protocol=4)

In [None]:
len(status_list)/4

In [None]:
status_list[:2]

In [None]:
from PIL import Image
import numpy as np
from scipy.io import loadmat
import tqdm
import pandas as pd
import os
from tqdm.notebook import tqdm
from joblib import Parallel, delayed
from glob import glob

# Image segmentation

Results from pretrained model (https://github.com/CSAILVision/semantic-segmentation-pytorch) on streetview data, 

Model inference used ade20k-resent50dilated-ppm_deepsup.yaml config

In [None]:
#!pip install mit_semseg

In [None]:
#!/opt/anaconda3/envs/straatvinken-dl/bin/pip install --upgrade pandas==1.3.5

In [None]:
### UTILITY FUNCTIONS

import os, csv, torch, numpy, scipy.io, PIL.Image, torchvision.transforms
from mit_semseg.models import ModelBuilder, SegmentationModule
from mit_semseg.utils import colorEncode

colors = scipy.io.loadmat('../data/external/CSAILVision/color150.mat')['colors']
names = {}
with open('../data/external/CSAILVision/object150_info.csv') as f:
    reader = csv.reader(f)
    next(reader)
    for row in reader:
        names[int(row[0])] = row[5].split(";")[0]

def visualize_result(img, pred, index=None):
    # filter prediction class if requested
    if index is not None:
        pred = pred.copy()
        pred[pred != index] = -1
        print(f'{names[index+1]}:')
        
    # colorize prediction
    pred_color = colorEncode(pred, colors).astype(numpy.uint8)

    # aggregate images and save
    im_vis = numpy.concatenate((img, pred_color), axis=1)
    display(PIL.Image.fromarray(im_vis))
    
def create_mask_and_masked(img, pred, mask_path, masked_path):
    # create mask
    pred_color = colorEncode(pred, colors).astype(numpy.uint8)
    pred_img=PIL.Image.fromarray(pred_color)
    pred_img.save(mask_path)
    PIL.Image.blend(pred_img, PIL.Image.fromarray(img), 0.5).save(masked_path)
    

# Load and normalize one image as a singleton tensor batch
pil_to_tensor = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], # These are RGB mean+std values
        std=[0.229, 0.224, 0.225])  # across a large photo dataset.
])
    
## LOAD MODEL
# Network Builders
net_encoder = ModelBuilder.build_encoder(
    arch='resnet50dilated',
    fc_dim=2048,
    weights='../data/model/CSAILVisionSegmentation/ade20k-resnet50dilated-ppm_deepsup/ckpt/encoder_epoch_20.pth')
net_decoder = ModelBuilder.build_decoder(
    arch='ppm_deepsup',
    fc_dim=2048,
    num_class=150,
    weights='../data/model/CSAILVisionSegmentation/ade20k-resnet50dilated-ppm_deepsup/ckpt/decoder_epoch_20.pth',
    use_softmax=True)

crit = torch.nn.NLLLoss(ignore_index=-1)
segmentation_module = SegmentationModule(net_encoder, net_decoder, crit)
segmentation_module.eval()
# when using a GPU
segmentation_module.cuda()

In [None]:
# enable continue for this long operation
#import pickle

#segmentation_data = pickle.load(open("segm_bck.pkl", "rb"))
#len(segmentation_data)

In [None]:
# Run the segmentation at the highest resolution.
import pickle
from glob import glob

def process_single_image(_img):
    pil_image = PIL.Image.open(_img).convert('RGB')
    img_original = numpy.array(pil_image)
    img_data = pil_to_tensor(pil_image)
    singleton_batch = {'img_data': img_data[None].cuda()} #.cuda()
    output_size = img_data.shape[1:]
    scores = segmentation_module(singleton_batch, segSize=output_size)

    
    # Get the predicted scores for each pixel
    _, pred = torch.max(scores, dim=1)
    pred = pred.cpu()[0].numpy()
    
    percentages = (numpy.bincount(pred.flatten()) * 100)/numpy.bincount(pred.flatten()).sum()
    result_dict = {names[i+1]: percentages[i] for i in range(len(percentages))}
    
    mask_path = _img.replace("/raw/google_streetview/","/processed/google_streetview_mask/")
    masked_path = _img.replace("/raw/google_streetview/","/processed/google_streetview_masked/")
    create_mask_and_masked(img_original, pred, mask_path, masked_path)
    return result_dict

if os.path.exists("segm.pkl"):
    segmentation_data = pickle.load(open("segm.pkl", "rb"))
else:
    segmentation_data = []

all_images = glob("../data/raw/google_streetview/*.jpg")
processed_images = [item["img_full"] for item in segmentation_data]
print(f"{len(processed_images)}/{len(all_images)} were already processed")

with torch.no_grad():
    i=1
    for _img in tqdm(all_images):
        if _img in processed_images:
            continue
        _res = process_single_image(_img)
        segmentation_data.append({"img": os.path.basename(_img), "img_full": _img,"segmentation_data": _res})
        i+=1


In [None]:
len(segmentation_data)

In [None]:
pickle.dump(segmentation_data, open("segm.pkl", "wb"))

In [None]:
segmentation_data = pickle.load(open("segm_bck.pkl", "rb"))

In [None]:
segm_df = pd.DataFrame(segmentation_data)
segm_df

In [None]:
pickle.dump(
    segm_df, 
        open(
            "../data/processed/20220127_streetview_segmentation_results_6420_samples_antwerp_df.pkl", 
            "wb"
        )
   )

In [None]:
import pickle
location_df = pickle.load(open("../data/processed/20220127_streetview_location_panorama_df.pkl", "rb"))
location_df.head()

In [None]:
segmentation_images_df = segm_df 

In [None]:
prefix = "../data/raw/google_streetview/"
suffixes = [
    "_0_N_0.jpg", 
    "_1_E_90.jpg", 
    "_2_S_180.jpg", 
    "_3_W_270.jpg",
]

orientations = [
    "north", 
    "east", 
    "south", 
    "west"
]

def get_pano_id(image_name):
    image_name = image_name.replace(prefix, "")
    for suffix in suffixes:
        image_name = image_name.replace(suffix, "")
    return image_name

def get_orientation(image_name):
    return orientations[int(np.where([suffix in image_name for suffix in suffixes])[0])]

#for image in os.listdir("../data/raw/google_streetview")[:300]:
#    print(image, get_orientation(image))

segmentation_images_df["pano_id"] = segmentation_images_df.img_full.apply(lambda x: get_pano_id(x))
segmentation_images_df["orientation"] = segmentation_images_df.img_full.apply(lambda x: get_orientation(x))
segmentation_images_df

In [None]:
labels_pivoted_df = segmentation_images_df.pivot(index="pano_id", columns="orientation", values="segmentation_data").reset_index()
labels_pivoted_df.head()

In [None]:
streetview_w_labels_df = location_df.merge(labels_pivoted_df, how="left", left_on='pano_id', right_on='pano_id', suffixes=(False, False))
pickle.dump(streetview_w_labels_df, open("../data/processed/20220122_streetview_coordinates_w_labels.pkl", "wb"))

In [None]:
import pickle
streetview_w_labels_df = pickle.load(open("../data/processed/20220122_streetview_coordinates_w_labels.pkl", "rb"))
streetview_w_labels_df

# Aggregation of segmentation percentages

For the for main wind directions, we aggregate segmentation percentages to have a single score per segmentation type

In [None]:
viewpoints = ["north", "east", "south", "west"]
from collections import Counter

def agg_segmentation_data(row, viewpoints):
    """
    aggregates segmentation data: sum of percentages for all viewpoints
    """
    return sum((Counter(dict(x)) for x in row[viewpoints]),
        Counter())

def normalized_agg_segmentation_data(row, viewpoints):
    """
    normalizes segmentation data
    """
    return {k: v/len(viewpoints) for k, v in agg_segmentation_data(row, viewpoints).items()}

def sum_segmentation_data(row, viewpoints):
    return sum(agg_segmentation_data(row, viewpoints).values())

streetview_w_labels_df["segmentation_agg"] = streetview_w_labels_df.apply(lambda x: normalized_agg_segmentation_data(x, viewpoints=viewpoints), axis=1)
#streetview_w_labels_df.apply(lambda x: sum_segmentation_data(x, viewpoints=viewpoints), axis=1).hist()
#streetview_w_labels_df.apply(lambda x: sum_segmentation_data(x, viewpoints=["segmentation_agg"]), axis=1)
streetview_w_labels_df

In [None]:
# check whether sum of percentages is indeed 100%

streetview_w_labels_df.apply(lambda x: sum_segmentation_data(x, viewpoints=["segmentation_agg"]), axis=1).hist()
streetview_w_labels_df.apply(lambda x: sum_segmentation_data(x, viewpoints=["segmentation_agg"]), axis=1)

# Pivoting: one column per landscape class

We want to have one column per landscape class.

In [None]:
landscape_percentages = streetview_w_labels_df.segmentation_agg.apply(pd.Series).fillna(0.0)
# filtering on classes which have way to few observations
landscape_percentages = landscape_percentages[
    landscape_percentages.sum(axis=0).index[landscape_percentages.sum(axis=0)>5]
]
landscape_percentages.head()

In [None]:
streetview_labeled_final_df = pd.concat(
    [
        streetview_w_labels_df[["location", "pano_id"]], 
        landscape_percentages
    ], axis=1)

In [None]:
# add image links
for _heading, _suffix in zip(["north", "east", "south", "west"],suffixes):
    streetview_labeled_final_df[f"img_{_heading}"] = prefix+ streetview_labeled_final_df.pano_id + _suffix
    streetview_labeled_final_df[f"img_{_heading}_masked"] = prefix.replace("raw/google_streetview", "processed/google_streetview_masked") + streetview_labeled_final_df.pano_id + _suffix
streetview_labeled_final_df["lat"] = streetview_labeled_final_df.location.apply(lambda x: x["lat"])
streetview_labeled_final_df["long"] = streetview_labeled_final_df.location.apply(lambda x: x["lng"])
streetview_labeled_final_df = streetview_labeled_final_df.drop(columns="location")
streetview_labeled_final_df

In [None]:
pickle.dump(streetview_labeled_final_df, open("../data/processed/20220127_streetview_coordinates_w_labels.pkl", "wb"))


### Top categories

Let's show the top categories in % of total coverage

In [None]:
segm_colums = [col for col in streetview_labeled_final_df.select_dtypes(include=[float]) if col not in ["lat", "long"]]
streetview_labeled_final_df[segm_colums].sum(axis=0).sort_values(ascending=False)[:15] *100 / streetview_labeled_final_df[segm_colums].sum().sum()

In [None]:
import pickle
streetview_labeled_final_df = pickle.load(open("../data/processed/20220127_streetview_coordinates_w_labels.pkl", "rb"))
streetview_labeled_final_df


In [None]:
segm_colums = [col for col in streetview_labeled_final_df.select_dtypes(include=[float]) if col not in ["lat", "long"]]
streetview_labeled_final_df[segm_colums].sum()
streetview_labeled_final_df[segm_colums].var()