In [58]:
import os

import pandas as pd
import s3fs
import zipfile
from scipy.io import loadmat

## Download les données

In [59]:
# Create filesystem object
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})

In [60]:
# Lister les challenges
fs.ls("gvimont/diffusion/hackathon-minarm-2024")

['gvimont/diffusion/hackathon-minarm-2024/AIVSAI',
 'gvimont/diffusion/hackathon-minarm-2024/Acoustique',
 'gvimont/diffusion/hackathon-minarm-2024/Similarité']

In [61]:
# Lister les fichiers d'un challenge
fs.ls("gvimont/diffusion/hackathon-minarm-2024/Similarité")

['gvimont/diffusion/hackathon-minarm-2024/Similarité/.keep',
 'gvimont/diffusion/hackathon-minarm-2024/Similarité/archive.zip']

In [62]:
# Télécharger les données dans le service
PATH_IN = 'gvimont/diffusion/hackathon-minarm-2024/Similarité/archive.zip'
fs.download(PATH_IN, 'data/archive.zip')

[None]

In [63]:
# Décompresser les données
with zipfile.ZipFile("data/archive.zip","r") as zip_file:
    zip_file.extractall("data/")

## Create dataframe

In [64]:
mat_data = loadmat("data/cars_annos.mat")

annotations = mat_data["annotations"]
class_names = mat_data["class_names"]
data = []

for ann in annotations[0]:
    relative_im_path = ann['relative_im_path'][0]
    bbox_x1 = ann['bbox_x1'][0][0]
    bbox_y1 = ann['bbox_y1'][0][0]
    bbox_x2 = ann['bbox_x2'][0][0]
    bbox_y2 = ann['bbox_y2'][0][0]
    class_num = ann['class'][0][0]
    test = ann['test'][0][0]
    data.append([relative_im_path, bbox_x1, bbox_y1, bbox_x2, bbox_y2, class_num, test])

df = pd.DataFrame(data, columns=['relative_im_path', 'bbox_x1', 'bbox_y1', 'bbox_x2', 'bbox_y2', 'class', 'test'])
df.head

<bound method NDFrame.head of          relative_im_path  bbox_x1  bbox_y1  bbox_x2  bbox_y2  class  test
0      car_ims/000001.jpg      112        7      853      717      1     0
1      car_ims/000002.jpg       48       24      441      202      1     0
2      car_ims/000003.jpg        7        4      277      180      1     0
3      car_ims/000004.jpg       33       50      197      150      1     0
4      car_ims/000005.jpg        5        8       83       58      1     0
...                   ...      ...      ...      ...      ...    ...   ...
16180  car_ims/016181.jpg       38       36      375      234    196     1
16181  car_ims/016182.jpg       29       34      235      164    196     1
16182  car_ims/016183.jpg       25       32      587      359    196     1
16183  car_ims/016184.jpg       56       60      208      186    196     1
16184  car_ims/016185.jpg        1        1      200      131    196     1

[16185 rows x 7 columns]>

In [68]:
class_names[0]
class_mapping = {}
for i in range(1,len(class_names[0]) + 1):
    class_mapping[i] = class_names[0][i-1][0]
class_mapping

{1: 'AM General Hummer SUV 2000',
 2: 'Acura RL Sedan 2012',
 3: 'Acura TL Sedan 2012',
 4: 'Acura TL Type-S 2008',
 5: 'Acura TSX Sedan 2012',
 6: 'Acura Integra Type R 2001',
 7: 'Acura ZDX Hatchback 2012',
 8: 'Aston Martin V8 Vantage Convertible 2012',
 9: 'Aston Martin V8 Vantage Coupe 2012',
 10: 'Aston Martin Virage Convertible 2012',
 11: 'Aston Martin Virage Coupe 2012',
 12: 'Audi RS 4 Convertible 2008',
 13: 'Audi A5 Coupe 2012',
 14: 'Audi TTS Coupe 2012',
 15: 'Audi R8 Coupe 2012',
 16: 'Audi V8 Sedan 1994',
 17: 'Audi 100 Sedan 1994',
 18: 'Audi 100 Wagon 1994',
 19: 'Audi TT Hatchback 2011',
 20: 'Audi S6 Sedan 2011',
 21: 'Audi S5 Convertible 2012',
 22: 'Audi S5 Coupe 2012',
 23: 'Audi S4 Sedan 2012',
 24: 'Audi S4 Sedan 2007',
 25: 'Audi TT RS Coupe 2012',
 26: 'BMW ActiveHybrid 5 Sedan 2012',
 27: 'BMW 1 Series Convertible 2012',
 28: 'BMW 1 Series Coupe 2012',
 29: 'BMW 3 Series Sedan 2012',
 30: 'BMW 3 Series Wagon 2012',
 31: 'BMW 6 Series Convertible 2007',
 32: 

In [70]:
df['class'] = df['class'].replace(class_mapping)
df

Unnamed: 0,relative_im_path,bbox_x1,bbox_y1,bbox_x2,bbox_y2,class,test
0,car_ims/000001.jpg,112,7,853,717,AM General Hummer SUV 2000,0
1,car_ims/000002.jpg,48,24,441,202,AM General Hummer SUV 2000,0
2,car_ims/000003.jpg,7,4,277,180,AM General Hummer SUV 2000,0
3,car_ims/000004.jpg,33,50,197,150,AM General Hummer SUV 2000,0
4,car_ims/000005.jpg,5,8,83,58,AM General Hummer SUV 2000,0
...,...,...,...,...,...,...,...
16180,car_ims/016181.jpg,38,36,375,234,smart fortwo Convertible 2012,1
16181,car_ims/016182.jpg,29,34,235,164,smart fortwo Convertible 2012,1
16182,car_ims/016183.jpg,25,32,587,359,smart fortwo Convertible 2012,1
16183,car_ims/016184.jpg,56,60,208,186,smart fortwo Convertible 2012,1


In [71]:
df_test = df[df['test'] == 1]
df_test

Unnamed: 0,relative_im_path,bbox_x1,bbox_y1,bbox_x2,bbox_y2,class,test
45,car_ims/000046.jpg,14,16,268,169,AM General Hummer SUV 2000,1
46,car_ims/000047.jpg,10,18,89,61,AM General Hummer SUV 2000,1
47,car_ims/000048.jpg,111,54,365,190,AM General Hummer SUV 2000,1
48,car_ims/000049.jpg,9,33,479,276,AM General Hummer SUV 2000,1
49,car_ims/000050.jpg,9,3,93,41,AM General Hummer SUV 2000,1
...,...,...,...,...,...,...,...
16180,car_ims/016181.jpg,38,36,375,234,smart fortwo Convertible 2012,1
16181,car_ims/016182.jpg,29,34,235,164,smart fortwo Convertible 2012,1
16182,car_ims/016183.jpg,25,32,587,359,smart fortwo Convertible 2012,1
16183,car_ims/016184.jpg,56,60,208,186,smart fortwo Convertible 2012,1


In [72]:
df_train = df[df['test'] == 0]
df_train

Unnamed: 0,relative_im_path,bbox_x1,bbox_y1,bbox_x2,bbox_y2,class,test
0,car_ims/000001.jpg,112,7,853,717,AM General Hummer SUV 2000,0
1,car_ims/000002.jpg,48,24,441,202,AM General Hummer SUV 2000,0
2,car_ims/000003.jpg,7,4,277,180,AM General Hummer SUV 2000,0
3,car_ims/000004.jpg,33,50,197,150,AM General Hummer SUV 2000,0
4,car_ims/000005.jpg,5,8,83,58,AM General Hummer SUV 2000,0
...,...,...,...,...,...,...,...
16140,car_ims/016141.jpg,42,9,573,366,smart fortwo Convertible 2012,0
16141,car_ims/016142.jpg,183,4,1186,780,smart fortwo Convertible 2012,0
16142,car_ims/016143.jpg,121,87,465,278,smart fortwo Convertible 2012,0
16143,car_ims/016144.jpg,21,30,468,320,smart fortwo Convertible 2012,0
