In [1]:
import json
import xmltodict
import xml.etree.ElementTree as ET 

import pandas as pd 
import geopandas as gpd
from shapely.geometry import Point

from pyproj import Proj

In [2]:
#load xml file
f = './training/netpens_v4/train/annotations/'
name = '101_(18, 37.2723, 23.2729, 37.2633, 23.2842).xml'
file = f + name

tree = ET.parse(file)
xml_data = tree.getroot()
xmlstr = ET.tostring(xml_data, encoding='utf8', method='xml')
data = dict(xmltodict.parse(xmlstr))
data = data['annotation']['object']

#convert to dataframe
df_list=[]
for i in range(len(data)):
    x = pd.DataFrame(data[i])
    x['annotation'] = i
    df_list.append(x) 
df = pd.concat(df_list, ignore_index=True)
df = df.drop(['name', 'pose', 'truncated', 'difficult'], axis=1)

print(len(data))
print(df.shape)
df.head()

27
(108, 2)


Unnamed: 0,bndbox,annotation
0,1386,0
1,1340,0
2,892,0
3,850,0
4,1330,1


In [20]:
#measure bounding boxes in meters
res = 0.5972

df_list=[]
for i in range(0, (len(data)) *4 , 4):
    xmin = float(df.iloc[i+1][0])
    ymin = float(df.iloc[i+3][0])
    xmax = float(df.iloc[i+0][0])
    ymax = float(df.iloc[i+2][0])
        
    d = {
        'annotation': [df['annotation'][i]],
        'xmin': xmin,
        'ymin': ymin,
        'xmax': xmax,
        'ymax': ymax,
        'y' : (xmin + xmax) / 2,
        'x' : (ymin + ymax) / 2,
        'length' : round((xmax - xmin) * res, 2),
        'width' : round((ymax - ymin) * res, 2)
    }
    df_list.append(pd.DataFrame(d))
df2 = pd.concat(df_list, ignore_index=True)
df2.head()

Unnamed: 0,annotation,xmin,ymin,xmax,ymax,y,x,length,width
0,0,1340.0,850.0,1386.0,892.0,1363.0,871.0,27.47,25.08
1,1,1285.0,836.0,1330.0,879.0,1307.5,857.5,26.87,25.68
2,2,1238.0,824.0,1282.0,867.0,1260.0,845.5,26.28,25.68
3,3,1187.0,811.0,1232.0,855.0,1209.5,833.0,26.87,26.28
4,4,1136.0,751.0,1181.0,796.0,1158.5,773.5,26.87,26.87


In [27]:
#add lat/lon coordinate
prj = Proj("+proj=utm +zone=34N, +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs")

top_left_lat = float(name.split(", ")[1]) #lat = x
top_left_lon = float(name.split(", ")[2]) #lon = y
top_left_utm = prj(top_left_lat, top_left_lon) 

x_list, y_list, = [], []
for i in (range(len(df2))):
    x = df2.iloc[i]['x'] * res
    y = df2.iloc[i]['y'] * res

    utm_x = top_left_utm[0] - x
    utm_y = top_left_utm[1] + y

    lat, lon = prj(utm_x, utm_y, inverse = True)
    x_list.append(lat)
    y_list.append(lon)

df2['lat'] = x_list
df2['lon'] = y_list

geometry = [Point(xy) for xy in zip(y_list, x_list)]
df2 = gpd.GeoDataFrame(df2, geometry=geometry, crs={'init' :'EPSG:4326'})
df2.head()

Unnamed: 0,annotation,xmin,ymin,xmax,ymax,y,x,length,width,lat,lon,geometry
0,0,1340.0,850.0,1386.0,892.0,1363.0,871.0,27.47,25.08,37.2683,23.280477,POINT (23.28047724081992 37.26829996029838)
1,1,1285.0,836.0,1330.0,879.0,1307.5,857.5,26.87,25.68,37.26834,23.280182,POINT (23.28018182237341 37.26833973176992)
2,2,1238.0,824.0,1282.0,867.0,1260.0,845.5,26.28,25.68,37.268376,23.279929,POINT (23.27992872046099 37.26837626993877)
3,3,1187.0,811.0,1232.0,855.0,1209.5,833.0,26.87,26.28,37.268414,23.27966,POINT (23.27965978733305 37.2684136705809)
4,4,1136.0,751.0,1181.0,796.0,1158.5,773.5,26.87,26.87,37.268714,23.27936,POINT (23.27936018839313 37.26871416099456)


In [29]:
output = '/Users/Zack/Desktop/' + name.split("x")[0] + 'csv'
df2.to_csv(output, index = None, header=True)

In [28]:
output2 = '/Users/Zack/Desktop/' + name.split("x")[0] + 'geojson'
output3 = '/Users/Zack/Desktop/' + name.split("x")[0] + 'shp'
df2.to_file(output2, driver='GeoJSON')
df2.to_file(output3)