In [2]:
import geopandas as gpd
import os
import pandas as pd
from tqdm.notebook import tqdm
from PIL import Image

In [2]:
classify_dict = pd.read_csv('./class_dict.csv')
classify_dict.head(5)


Unnamed: 0,name,r,g,b
0,Animal,64,128,64
1,Archway,192,0,128
2,Bicyclist,0,128,192
3,Bridge,0,128,64
4,Building,128,0,0


In [3]:
# reading img into numpy array, color order: (B,G,R)

def countColor(fp:str):
    # counting color in img
    # input: img file path
    # output: dict {(r,g,b): int}
    img = Image.open(fp)
    count = img.getcolors()
    count = {x[1]:x[0] for x in count}
    return count 

def counterToDataframe(counter:dict, SEG_ID:int, pointId:str):
    # converting color counts into categories
    # input: counts, SEG_ID, pointId (the index of pic)
    # output: one row of pandas dateframe, columns are categories
    result = pd.DataFrame({"SEG_ID":[SEG_ID],"pointId":[pointId]})
    for i, row in classify_dict.iterrows():
        rgb = (row.r,row.g,row.b)
        className = row["name"]
        if rgb in counter.keys():
            count = counter[rgb]
        else: count = 0
        result[className] = count
    return result



In [4]:
inputImgPath = "segnet-output/"
inputImgFiles = os.listdir(inputImgPath)
classCountDf = None
for i, file in tqdm(enumerate(inputImgFiles),total=len(inputImgFiles)):
    fp = inputImgPath + file
    index = file.split(".")[0]
    segId,imgId = index.split('-')
    segId = int(segId)
    colorCount = countColor(fp)
    imgCountDf = counterToDataframe(colorCount,segId,imgId)
    if classCountDf is None: 
        classCountDf = imgCountDf
    else:
        classCountDf = pd.concat([classCountDf, imgCountDf], axis=0)
# classCountDf.to_csv('./segnetClassCount.csv')

  0%|          | 0/12317 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [3]:
classCountDf = pd.read_csv('./segnetClassCount.csv')

In [6]:
classCountDf

Unnamed: 0.1,Unnamed: 0,SEG_ID,pointId,Animal,Archway,Bicyclist,Bridge,Building,Car,CartLuggagePram,...,SUVPickupTruck,TrafficCone,TrafficLight,Train,Tree,Truck_Bus,Tunnel,VegetationMisc,Void,Wall
0,0,641548,10394,4,57,513,3961,185859,6573,0,...,2763,10,22,0,256615,1525,0,211069,95643,49352
1,0,640416,1313,1,0,2348,3583,339775,6591,3,...,8303,0,7,0,191244,1631,0,54069,66082,52980
2,0,620130,7230,1,37,1012,143,179598,9079,56,...,2222,126,407,0,146165,2195,0,206713,136329,12554
3,0,401340,9336,0,0,1454,733,204396,27068,257,...,3830,0,1,0,63243,7148,0,64959,84051,35284
4,0,761434,6817,8,9,75,876,127222,51155,0,...,5028,121,25,0,195356,4110,0,140999,124267,4872
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12312,0,762729,9793,0,10,140,816,21628,10923,0,...,4305,0,1,0,219913,5455,0,144453,82498,4792
12313,0,420928,9032,22,0,1788,149,146719,34561,1,...,4588,88,169,0,173778,2732,0,67933,120717,44371
12314,0,641818,3436,30,0,4519,198,283166,34080,29,...,1073,8,4,0,135832,142,0,51242,65817,56842
12315,0,820541,10376,0,9,26,136,66661,514,0,...,141,189,5,0,205619,9,0,395558,32781,3165


In [4]:
# the total Pixel number class count dataframe
totalPixels = classCountDf.drop(columns=['Unnamed: 0', 'SEG_ID','pointId']).sum(axis=1)[0]

# calculate the percentage of each category
segnetPercentage = classCountDf.applymap(lambda x:(x/totalPixels))
segnetPercentage = segnetPercentage.drop(columns=['Unnamed: 0', 'SEG_ID','pointId'])

# merge it back into the original df
combined = pd.merge(classCountDf[classCountDf.columns[0:3]], segnetPercentage,left_index=True, right_index=True)
combined = combined.drop('Unnamed: 0',axis=1)
combined.pointId = combined.pointId.map("{:0>6d}".format)
combined.head()

Unnamed: 0,SEG_ID,pointId,Animal,Archway,Bicyclist,Bridge,Building,Car,CartLuggagePram,Child,...,SUVPickupTruck,TrafficCone,TrafficLight,Train,Tree,Truck_Bus,Tunnel,VegetationMisc,Void,Wall
0,641548,10394,3.814697e-06,5.4e-05,0.000489,0.003778,0.177249,0.006269,0.0,2.861023e-06,...,0.002635,1e-05,2.098083e-05,0.0,0.244727,0.001454,0.0,0.201291,0.091212,0.047066
1,640416,1313,9.536743e-07,0.0,0.002239,0.003417,0.324035,0.006286,3e-06,0.0,...,0.007918,0.0,6.67572e-06,0.0,0.182384,0.001555,0.0,0.051564,0.063021,0.050526
2,620130,7230,9.536743e-07,3.5e-05,0.000965,0.000136,0.171278,0.008658,5.3e-05,5.626678e-05,...,0.002119,0.00012,0.0003881454,0.0,0.139394,0.002093,0.0,0.197137,0.130013,0.011972
3,401340,9336,0.0,0.0,0.001387,0.000699,0.194927,0.025814,0.000245,0.0,...,0.003653,0.0,9.536743e-07,0.0,0.060313,0.006817,0.0,0.06195,0.080157,0.033649
4,761434,6817,7.629395e-06,9e-06,7.2e-05,0.000835,0.121328,0.048785,0.0,9.536743e-07,...,0.004795,0.000115,2.384186e-05,0.0,0.186306,0.00392,0.0,0.134467,0.11851,0.004646


## combine variables

Wall : Column_Pole + Fence + Wall + TrafficCone

Lives : Animal + Bicyclist + Child + Pedestrain + MotorcycleScooter + otherMoving

Building : Archway + Building

Infrastructure : Bridge + ParkingBlock + Train + Tunnel

Road : Road + RoadShoulder + 'LaneMkgsDriv'+ 'LaneMkgsNonDriv'

Sidewalk

Sky

Green : Tree + VegetationMisc

Transportation : Car + SUVPickupTruck + Truck_Bus

PublicService: CartLuggagePram + SignSymbol + TrafficLight

Delete:
Void +  'Misc_Text',

In [5]:
# create a new df
combined_variables = combined[['SEG_ID','pointId']].copy()

# combine categories into bigger categories
combined_variables['wall'] = combined.Column_Pole + combined.Fence + combined.Wall + combined.TrafficCone
combined_variables['lives'] = combined.Animal + combined.Bicyclist + combined.Child + combined.Pedestrian +combined.MotorcycleScooter +combined.OtherMoving
combined_variables['building'] = combined.Building + combined.Archway 
combined_variables['infrastructure'] = combined.Bridge + combined.ParkingBlock + combined.Train + combined.Tunnel
combined_variables['road'] = combined.RoadShoulder + combined.LaneMkgsDriv + combined.Road + combined.LaneMkgsNonDriv
combined_variables['sidewalk'] = combined.Sidewalk 
combined_variables['sky'] = combined.Sky 
combined_variables['green'] = combined.Tree + combined.VegetationMisc 
combined_variables['transportation'] = combined.Car + combined.SUVPickupTruck + combined.Truck_Bus 
combined_variables['publicservice'] = combined.CartLuggagePram + combined.SignSymbol + combined.TrafficLight 

In [30]:
cols = ['wall', 'lives', 'building', 'infrastructure', 'road', 'sidewalk', 'sky', 'green', 'transportation', 'publicservice']


# to make quantile columns, prettier on the maps
for colName in cols:
    combined_variables[colName+"_q"] = combined_variables[colName].rank(pct=True)//(1/6)/5
    combined_variables[colName+"_q"]  = combined_variables[colName+"_q"].apply(lambda x: 1 if x > 1 else x)


In [32]:
streetPoints = gpd.read_file("../streetViewPoints.geojson")
streetPoints = streetPoints.to_crs(4326)

In [33]:
streetPoints = streetPoints.merge(combined_variables,on=['SEG_ID','pointId'])
streetPoints.to_file("../pointsWithSeg.geojson", driver="GeoJSON")