In [1]:
import geopandas as gpd
import os
import pandas as pd
from tqdm.notebook import tqdm
from PIL import Image

In [2]:
classify_dict = pd.read_csv('./class_dict.csv')
classify_dict.head(5)


Unnamed: 0,name,r,g,b
0,Animal,64,128,64
1,Archway,192,0,128
2,Bicyclist,0,128,192
3,Bridge,0,128,64
4,Building,128,0,0


In [3]:
# reading img into numpy array, color order: (B,G,R)

def countColor(fp:str):
    # counting color in img
    # input: img file path
    # output: dict {(r,g,b): int}
    img = Image.open(fp)
    count = img.getcolors()
    count = {x[1]:x[0] for x in count}
    return count 

def counterToDataframe(counter:dict, SEG_ID:int, pointId:str):
    # converting color counts into categories
    # input: counts, SEG_ID, pointId (the index of pic)
    # output: one row of pandas dateframe, columns are categories
    result = pd.DataFrame({"SEG_ID":[SEG_ID],"pointId":[pointId]})
    for i, row in classify_dict.iterrows():
        rgb = (row.r,row.g,row.b)
        className = row["name"]
        if rgb in counter.keys():
            count = counter[rgb]
        else: count = 0
        result[className] = count
    return result



In [4]:
inputImgPath = "segnet-output/"
inputImgFiles = os.listdir(inputImgPath)
classCountDf = None
for i, file in tqdm(enumerate(inputImgFiles),total=len(inputImgFiles)):
    fp = inputImgPath + file
    index = file.split(".")[0]
    segId,imgId = index.split('-')
    segId = int(segId)
    colorCount = countColor(fp)
    imgCountDf = counterToDataframe(colorCount,segId,imgId)
    if classCountDf is None: 
        classCountDf = imgCountDf
    else:
        classCountDf = pd.concat([classCountDf, imgCountDf], axis=0)
# classCountDf.to_csv('./segnetClassCount.csv')

  0%|          | 0/12317 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [5]:
classCountDf = pd.read_csv('./segnetClassCount.csv')

In [6]:
classCountDf

Unnamed: 0.1,Unnamed: 0,SEG_ID,pointId,Animal,Archway,Bicyclist,Bridge,Building,Car,CartLuggagePram,...,SUVPickupTruck,TrafficCone,TrafficLight,Train,Tree,Truck_Bus,Tunnel,VegetationMisc,Void,Wall
0,0,641548,10394,4,57,513,3961,185859,6573,0,...,2763,10,22,0,256615,1525,0,211069,95643,49352
1,0,640416,1313,1,0,2348,3583,339775,6591,3,...,8303,0,7,0,191244,1631,0,54069,66082,52980
2,0,620130,7230,1,37,1012,143,179598,9079,56,...,2222,126,407,0,146165,2195,0,206713,136329,12554
3,0,401340,9336,0,0,1454,733,204396,27068,257,...,3830,0,1,0,63243,7148,0,64959,84051,35284
4,0,761434,6817,8,9,75,876,127222,51155,0,...,5028,121,25,0,195356,4110,0,140999,124267,4872
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12312,0,762729,9793,0,10,140,816,21628,10923,0,...,4305,0,1,0,219913,5455,0,144453,82498,4792
12313,0,420928,9032,22,0,1788,149,146719,34561,1,...,4588,88,169,0,173778,2732,0,67933,120717,44371
12314,0,641818,3436,30,0,4519,198,283166,34080,29,...,1073,8,4,0,135832,142,0,51242,65817,56842
12315,0,820541,10376,0,9,26,136,66661,514,0,...,141,189,5,0,205619,9,0,395558,32781,3165


In [10]:
totalPixels = classCountDf.drop(columns=['Unnamed: 0', 'SEG_ID','pointId']).sum(axis=1)[0]
classCountDf.applymap(lambda x:(x/totalPixels))

Unnamed: 0.1,Unnamed: 0,SEG_ID,pointId,Animal,Archway,Bicyclist,Bridge,Building,Car,CartLuggagePram,...,SUVPickupTruck,TrafficCone,TrafficLight,Train,Tree,Truck_Bus,Tunnel,VegetationMisc,Void,Wall
0,0.0,0.611828,0.009912,3.814697e-06,0.000054,0.000489,0.003778,0.177249,0.006269,0.000000e+00,...,0.002635,0.000010,2.098083e-05,0.0,0.244727,0.001454,0.0,0.201291,0.091212,0.047066
1,0.0,0.610748,0.001252,9.536743e-07,0.000000,0.002239,0.003417,0.324035,0.006286,2.861023e-06,...,0.007918,0.000000,6.675720e-06,0.0,0.182384,0.001555,0.0,0.051564,0.063021,0.050526
2,0.0,0.591402,0.006895,9.536743e-07,0.000035,0.000965,0.000136,0.171278,0.008658,5.340576e-05,...,0.002119,0.000120,3.881454e-04,0.0,0.139394,0.002093,0.0,0.197137,0.130013,0.011972
3,0.0,0.382748,0.008904,0.000000e+00,0.000000,0.001387,0.000699,0.194927,0.025814,2.450943e-04,...,0.003653,0.000000,9.536743e-07,0.0,0.060313,0.006817,0.0,0.061950,0.080157,0.033649
4,0.0,0.726160,0.006501,7.629395e-06,0.000009,0.000072,0.000835,0.121328,0.048785,0.000000e+00,...,0.004795,0.000115,2.384186e-05,0.0,0.186306,0.003920,0.0,0.134467,0.118510,0.004646
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12312,0.0,0.727395,0.009339,0.000000e+00,0.000010,0.000134,0.000778,0.020626,0.010417,0.000000e+00,...,0.004106,0.000000,9.536743e-07,0.0,0.209725,0.005202,0.0,0.137761,0.078676,0.004570
12313,0.0,0.401428,0.008614,2.098083e-05,0.000000,0.001705,0.000142,0.139922,0.032960,9.536743e-07,...,0.004375,0.000084,1.611710e-04,0.0,0.165728,0.002605,0.0,0.064786,0.115125,0.042315
12314,0.0,0.612085,0.003277,2.861023e-05,0.000000,0.004310,0.000189,0.270048,0.032501,2.765656e-05,...,0.001023,0.000008,3.814697e-06,0.0,0.129539,0.000135,0.0,0.048868,0.062768,0.054209
12315,0.0,0.782529,0.009895,0.000000e+00,0.000009,0.000025,0.000130,0.063573,0.000490,0.000000e+00,...,0.000134,0.000180,4.768372e-06,0.0,0.196094,0.000009,0.0,0.377234,0.031262,0.003018


In [7]:
streetPoints = gpd.read_file("../streetViewPoints.geojson")
streetPoints = streetPoints.to_crs(4326)

In [8]:
areaDf = pd.merge(streetPoints,classCountDf,on=["SEG_ID","pointId"])
areaDf

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat