This notebook shows how to append data from a **CVAT xml output file** to the **videosurvey** MySQL database.

It is assumed that the **videosurvey.videos** table contain exactly one record which stores metadata for the video processed by CVAT.

Data is written to 3 **videosurvey** tables: **frames**, **trees**, and **vcuts**. 

In [6]:
import pandas as pd
from sqlalchemy import create_engine
from getpass import getpass
import xml.etree.ElementTree as ET

In [9]:
CVATXMLFILE = '20200703_124043_cvat_annotation_temp_skip_7_numframes_21400.xml'
VIDEO_ID = '20200703_124043.mp4'

In [15]:
def connect_to_db():
    user = input('videosurvey DB user name:')
    password = getpass('videosurvey DB password:')
    print('Please wait a few seconds for a message confirming successful connection.')
    engine = create_engine(f'mysql+pymysql://{user}:{password}@mysql.guaminsects.net/videosurvey')
    connection = engine.connect() # test connection
    print(connection)

# MAIN

In [73]:
connect_to_db()

df_videos = pd.read_sql(f'SELECT * FROM videos WHERE video_id="{VIDEO_ID}"', connection, index_col=None)  
if df_videos.shape[0]!=1:
    raise SystemExit(f"ERROR: Cannot find a row for {VIDEO_ID} in the videos table.")
    
# Delete all rows associated with VIDEO_ID from tables frames, trees, and vcuts
connection.execute(f'DELETE FROM frames WHERE video_id="{VIDEO_ID}"')
connection.execute(f'DELETE FROM trees WHERE frame_id LIKE "{VIDEO_ID}%%"')
connection.execute(f'DELETE FROM vcuts WHERE frame_id LIKE "{VIDEO_ID}%%"')

# Create empty dataframes for tables frames, trees, and vcuts
df_frames = pd.read_sql(f'SELECT * FROM frames WHERE video_id="{VIDEO_ID}"', connection, index_col=None) 
df_trees  = pd.read_sql(f'SELECT * FROM trees WHERE frame_id LIKE "{VIDEO_ID}%%"', connection, index_col=None) 
df_vcuts  = pd.read_sql(f'SELECT * FROM vcuts WHERE frame_id LIKE "{VIDEO_ID}%%"', connection, index_col=None) 

videosurvey DB user name:aubreymoore
videosurvey DB password:········
Please wait a few seconds for a message confirming successful connection.
<sqlalchemy.engine.base.Connection object at 0x7f1425619610>


In [75]:
tree = ET.parse(CVATXMLFILE)
root = tree.getroot()

mylist = []
for image in root.findall('image'):
    for box in image.findall('box'):
        mydict = box.attrib
        mydict.update(image.attrib)
        mylist.append(mydict)
#mylist

df = pd.DataFrame(mylist)


df

Unnamed: 0,label,occluded,xtl,ytl,xbr,ybr,id,name,width,height
0,light,0,1503.00,1042.00,1669.00,1248.00,0,frame_000000,3840,2160
1,light,0,1282.00,988.00,1459.00,1251.00,0,frame_000000,3840,2160
2,medium,0,1407.00,1014.00,1567.00,1253.00,0,frame_000000,3840,2160
3,light,0,1335.00,964.00,1541.00,1274.00,7,frame_000007,3840,2160
4,light,0,1586.00,1023.00,1742.00,1244.00,7,frame_000007,3840,2160
...,...,...,...,...,...,...,...,...,...,...
1821,light,0,1553.00,893.00,1991.00,1345.00,19446,frame_019446,3840,2160
1822,medium,0,1753.00,795.00,2296.00,1426.00,19453,frame_019453,3840,2160
1823,medium,0,2034.00,618.00,2764.00,1396.00,19460,frame_019460,3840,2160
1824,medium,0,2635.00,394.00,3547.00,1254.00,19467,frame_019467,3840,2160


In [56]:
result = connection.execute('SELECT * FROM frames WHERE video_id="{VIDEO_ID}"')
#get_data_from_frames_table()

In [65]:
connection.execute(f'DELETE FROM frames WHERE video_id="{VIDEO_ID}"')
rs = connection.execute(f'SELECT * FROM frames WHERE video_id="{VIDEO_ID}"')
for row in rs:
    print(row)


In [57]:
for row in result:
    print(row)

In [38]:
sql = f"DELETE FROM vcuts WHERE frame_id LIKE '{VIDEO_ID}%%'"
connection.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7f14256fd790>

In [None]:
def str2int(s):
    return int(float(s))

# Parse the CVAT xml file

In [29]:
tree = ET.parse(CVATXMLFILE)
root = tree.getroot()

print(ET.tostring(root.find('meta'), encoding='unicode'))

<meta>
    <task>
      <id>1</id>
      <name>demo</name>
      <size>21400</size>
      <mode>interpolation</mode>
      <start_frame>0</start_frame>
      <stop_frame>21399</stop_frame>
      <z_order>False</z_order>
      <labels>
        <label>
          <name>zero</name>
        </label>
        <label>
          <name>light</name>
        </label>
        <label>
          <name>medium</name>
        </label>
        <label>
          <name>high</name>
        </label>
        <label>
          <name>non_recoverable</name>
        </label>
      </labels>
    </task>
  </meta>
  


## Extract the **frames** data

## Extract the **trees** data

In [31]:
mylist = []
for image in root.findall('image'):
    for box in image.findall('box'):
        mydict = box.attrib
        mydict.update(image.attrib)
        mylist.append(mydict)
mylist

[{'label': 'light',
  'occluded': '0',
  'xtl': '1503.00',
  'ytl': '1042.00',
  'xbr': '1669.00',
  'ybr': '1248.00',
  'id': '0',
  'name': 'frame_000000',
  'width': '3840',
  'height': '2160'},
 {'label': 'light',
  'occluded': '0',
  'xtl': '1282.00',
  'ytl': '988.00',
  'xbr': '1459.00',
  'ybr': '1251.00',
  'id': '0',
  'name': 'frame_000000',
  'width': '3840',
  'height': '2160'},
 {'label': 'medium',
  'occluded': '0',
  'xtl': '1407.00',
  'ytl': '1014.00',
  'xbr': '1567.00',
  'ybr': '1253.00',
  'id': '0',
  'name': 'frame_000000',
  'width': '3840',
  'height': '2160'},
 {'label': 'light',
  'occluded': '0',
  'xtl': '1335.00',
  'ytl': '964.00',
  'xbr': '1541.00',
  'ybr': '1274.00',
  'id': '7',
  'name': 'frame_000007',
  'width': '3840',
  'height': '2160'},
 {'label': 'light',
  'occluded': '0',
  'xtl': '1586.00',
  'ytl': '1023.00',
  'xbr': '1742.00',
  'ybr': '1244.00',
  'id': '7',
  'name': 'frame_000007',
  'width': '3840',
  'height': '2160'},
 {'label': 

# vcuts

In [33]:
mylist = []
for image in root.findall('image'):
    for polygon in image.findall('polygon'):
        mydict = polygon.attrib
        mydict.update(image.attrib)
        mylist.append(mydict)
mylist

[{'label': 'cut',
  'occluded': '0',
  'points': '1931.00,931.50;1903.50,926.00;1913.50,896.00;1923.00,888.50;1934.00,889.50;1940.50,897.00;1940.50,918.00;1931.00,931.50',
  'id': '28',
  'name': 'frame_000028',
  'width': '3840',
  'height': '2160'},
 {'label': 'cut',
  'occluded': '0',
  'points': '2094.00,884.50;2070.00,881.50;2066.50,873.00;2074.50,846.00;2087.00,835.50;2098.00,833.50;2109.50,840.00;2115.50,855.00;2103.50,879.00;2094.00,884.50',
  'id': '35',
  'name': 'frame_000035',
  'width': '3840',
  'height': '2160'},
 {'label': 'cut',
  'occluded': '0',
  'points': '2288.00,835.50;2276.00,835.50;2270.00,831.50;2264.50,825.00;2264.50,817.00;2275.50,785.00;2288.00,772.50;2297.00,770.50;2306.50,778.00;2312.50,800.00;2296.50,830.00;2288.00,835.50',
  'id': '42',
  'name': 'frame_000042',
  'width': '3840',
  'height': '2160'},
 {'label': 'cut',
  'occluded': '0',
  'points': '2538.00,750.50;2527.00,745.50;2522.50,739.00;2525.50,723.00;2542.50,689.00;2552.00,682.50;2562.00,681.50

In [28]:
df = pd.DataFrame(mylist)
df = df[(df.occluded=='0') & (df.outside=='0')]
df.drop(['occluded', 'outside'], axis=1, inplace=True)
df.rename({'id': 'track_id'}, axis=1, inplace=True)
df.xbr = df.xbr.apply(lambda x: str2int(x))
df.xtl = df.xtl.apply(lambda x: str2int(x))
df.ybr = df.ybr.apply(lambda x: str2int(x))
df.ytl = df.ytl.apply(lambda x: str2int(x))
df.frame = df.frame.apply(lambda x: str2int(x))
df.track_id = df.track_id.apply(lambda x: str2int(x))
df

<meta>
    <task>
      <id>1</id>
      <name>demo</name>
      <size>21400</size>
      <mode>interpolation</mode>
      <start_frame>0</start_frame>
      <stop_frame>21399</stop_frame>
      <z_order>False</z_order>
      <labels>
        <label>
          <name>zero</name>
        </label>
        <label>
          <name>light</name>
        </label>
        <label>
          <name>medium</name>
        </label>
        <label>
          <name>high</name>
        </label>
        <label>
          <name>non_recoverable</name>
        </label>
      </labels>
    </task>
  </meta>
  


AttributeError: 'DataFrame' object has no attribute 'occluded'