**Packages**

In [None]:
pip install flake8 pycodestyle_magic

In [None]:
%load_ext pycodestyle_magic

In [None]:
# Packages import
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
from PIL import Image, ImageDraw
import ast

!pip install -q imagesize
import imagesize

In [None]:
%pycodestyle_on
df = pd.read_csv('/kaggle/input/tensorflow-great-barrier-reef/train.csv')

In [None]:
%pycodestyle_on
print('df size :',len(df))

In [None]:
%pycodestyle_on
# Add the link to the image
df['img_path'] = os.path.join('../input/tensorflow-great-barrier-reef/train_images')+"/video_"+df.video_id.astype(str)+"/"+df.video_frame.astype(str)+".jpg"
df.head()

# Exploration

Exploration code inspired by Kartik Khandelwal notebook 📊📈Data Analysis & Visualization for Beginners

In [None]:
%pycodestyle_on
# How many image per video?
plt.figure(figsize=(8,5))
sns.countplot(df['video_id'], color='#49A9DB').set_title('Nb of image per video')

In [None]:
%pycodestyle_on
# How many image with starfish ?
with_annotation = len(df[df['annotations'] != '[]'])
without_annotation = len(df[df['annotations'] == '[]'])

labels = ['Without Bounding Box', 'With Bounding Box']

fig = go.Figure([go.Bar(x=labels, 
                        y=[without_annotation, with_annotation], width=0.6)])
fig.update_layout(title="Image with Starfish", autosize=False, width=500, height=350, margin=dict(l=60, r=60, b=50, t=50, pad=4))
fig.show()

In [None]:
%pycodestyle_on
# How many starfish detected per image ? 

# creating new column which contains the total number of bounding boxes
df['No_bbox'] = df['annotations'].apply(lambda x:x.count('{')) 
df.head()

In [None]:
%pycodestyle_on
fig = px.bar(df['No_bbox'].value_counts().drop(0), title='Count of Bounding Boxes per image')
fig.update_layout(autosize=False, width=700, height=400, margin=dict(l=60, r=60, b=50, t=50, pad=4))
fig.show()

In [None]:
%pycodestyle_on
# change 'annotations' from string to list data type using ast
df['annotations'] = df['annotations'].apply(ast.literal_eval)
df.head()

In [None]:
%pycodestyle_on
# Changement de format pour les BBoxes [x,y,width,height]
def get_bbox(annots):
    bboxes = [annot.values() for annot in annots]
    return bboxes

df['bboxes'] = df.annotations.apply(get_bbox)
df.head()

In [None]:
%pycodestyle_on
# Vérification des tailles des images : toutes les images ont bien la même taille
def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['img_path'])
    return row

df = df.apply(get_imgsize,axis=1)
display(df.width.unique(), df.height.unique())
display(df.head(2))

In [None]:
%pycodestyle_on
def img_viz(df, id):
    image = df['img_path'][id]
    img = Image.open(image)
    
    for box in df['annotations'][id]:
        shape = [box['x'], box['y'], box['x']+box['width'], box['y']+box['height']]
        ImageDraw.Draw(img).rectangle(shape, outline ="red", width=3)
    display(img)

In [None]:
%pycodestyle_on
img_viz(df, 40)