In [2]:
import os
import pandas as pd

## Adjust Clean

In [16]:
data_path = './data-modified-resized-clean/train/DataTrain.csv'

df = pd.read_csv(data_path, delimiter=';')
directory = './data-modified-resized-clean/train/data/'
df = df.drop('Unnamed: 0', axis=1)

In [18]:
# png to jpg
df['NameofFile'] = df['NameofFile'].str.replace('.png', '.jpg')

In [19]:
df

Unnamed: 0,Vehicleregistrationplate,NameofFile
0,A7814,DataTrain1.jpg
1,B1074QO,DataTrain2.jpg
2,B1031QO,DataTrain3.jpg
3,B187EDA,DataTrain4.jpg
4,B1089VD,DataTrain5.jpg
...,...,...
795,B1677EJC,DataTrain796.jpg
796,B1743VO,DataTrain797.jpg
797,AD1416YD,DataTrain798.jpg
798,AB5419TN,DataTrain799.jpg


In [20]:
# assuming 'df' is your DataFrame and 'directory' is the directory you want to check
df = df[df['NameofFile'].apply(lambda x: os.path.exists(os.path.join(directory, x)))]

In [21]:
df

Unnamed: 0,Vehicleregistrationplate,NameofFile
3,B187EDA,DataTrain4.jpg
4,B1089VD,DataTrain5.jpg
5,B1972RBP,DataTrain6.jpg
7,AB6268YQ,DataTrain8.jpg
9,B1554EJA,DataTrain10.jpg
...,...,...
791,A8815VX,DataTrain792.jpg
793,AB1364AN,DataTrain794.jpg
795,B1677EJC,DataTrain796.jpg
796,B1743VO,DataTrain797.jpg


In [22]:
df.to_csv('./data-modified-resized-clean/train/DataTrain-Cleaned.csv', index=False)

## Adjust Augment

In [45]:
data_path = './data-modified-resized-augment/train-aug/DataTrain.csv'

df = pd.read_csv(data_path, delimiter=';')
directory = './data-modified-resized-clean/train/data/'
df = df.drop('Unnamed: 0', axis=1)

repeat = 30

In [46]:
paths = df.NameofFile.tolist()
labels = df.Vehicleregistrationplate.tolist()
adjusted_paths = []
adjusted_labels = []

In [47]:
for i, label in enumerate(labels):
  for j in range(repeat):
    img_base, ext = os.path.splitext(paths[i])
    img_modified = img_base+f'-{j}'+ext
    adjusted_paths.append(img_modified)
    adjusted_labels.append(label)

In [48]:
modified_df = pd.DataFrame({'labels': adjusted_labels, 'images_path': adjusted_paths})

In [49]:
modified_df

Unnamed: 0,labels,images_path
0,A7814,DataTrain1-0.png
1,A7814,DataTrain1-1.png
2,A7814,DataTrain1-2.png
3,A7814,DataTrain1-3.png
4,A7814,DataTrain1-4.png
...,...,...
23995,AB6315SE,DataTrain800-25.png
23996,AB6315SE,DataTrain800-26.png
23997,AB6315SE,DataTrain800-27.png
23998,AB6315SE,DataTrain800-28.png


In [51]:
# save the updated DataFrame to a new CSV file
modified_df.to_csv('./data-modified-resized-augment/train-aug/updated_labels.csv', index=False)

# Adjust Augment Clean

In [10]:
data_path = './data-modified-resized-clean-augment/train-aug/DataTrain-Cleaned.csv'

df = pd.read_csv(data_path, delimiter=',')
directory = './data-modified-resized-clean-augment/train/data/'
# df = df.drop('Unnamed: 0', axis=1)

repeat = 10

In [11]:
paths = df.NameofFile.tolist()
labels = df.Vehicleregistrationplate.tolist()
adjusted_paths = []
adjusted_labels = []

In [12]:
for i, label in enumerate(labels):
  for j in range(repeat):
    img_base, ext = os.path.splitext(paths[i])
    img_modified = img_base+f'-{j}'+ext
    adjusted_paths.append(img_modified)
    adjusted_labels.append(label)

In [13]:
modified_df = pd.DataFrame({'labels': adjusted_labels, 'images_path': adjusted_paths})

In [14]:
modified_df

Unnamed: 0,labels,images_path
0,B187EDA,DataTrain4-0.jpg
1,B187EDA,DataTrain4-1.jpg
2,B187EDA,DataTrain4-2.jpg
3,B187EDA,DataTrain4-3.jpg
4,B187EDA,DataTrain4-4.jpg
...,...,...
7115,AD1416YD,DataTrain798-5.jpg
7116,AD1416YD,DataTrain798-6.jpg
7117,AD1416YD,DataTrain798-7.jpg
7118,AD1416YD,DataTrain798-8.jpg


In [9]:
# save the updated DataFrame to a new CSV file
modified_df.to_csv('./data-modified-resized-clean-augment/train-aug-v2/updated_labels.csv', index=False)

## Adjust Localized Augment

In [3]:
data_path = './data-original/train/DataTrain.csv'

df = pd.read_csv(data_path, delimiter=';')
directory = './data-modified-resized-clean-augment/train/data/'
df = df.drop('Unnamed: 0', axis=1)

repeat = 50

In [4]:
paths = df.NameofFile.tolist()
labels = df.Vehicleregistrationplate.tolist()
adjusted_paths = []
adjusted_labels = []

In [5]:
for i, label in enumerate(labels):
  for j in range(repeat):
    img_base, ext = os.path.splitext(paths[i])
    img_modified = img_base+f'-{j}'+ext
    adjusted_paths.append(img_modified)
    adjusted_labels.append(label)

In [6]:
modified_df = pd.DataFrame({'labels': adjusted_labels, 'images_path': adjusted_paths})

In [7]:
modified_df

Unnamed: 0,labels,images_path
0,A7814,DataTrain1-0.png
1,A7814,DataTrain1-1.png
2,A7814,DataTrain1-2.png
3,A7814,DataTrain1-3.png
4,A7814,DataTrain1-4.png
...,...,...
39995,AB6315SE,DataTrain800-45.png
39996,AB6315SE,DataTrain800-46.png
39997,AB6315SE,DataTrain800-47.png
39998,AB6315SE,DataTrain800-48.png


In [8]:
# save the updated DataFrame to a new CSV file
modified_df.to_csv('./data-modified-localized/train-aug/updated_labels.csv', index=False)

## Adjust localized augment clean

In [3]:
data_path = './data-modified-resized-clean-augment/train-aug/DataTrain-Cleaned.csv'

df = pd.read_csv(data_path, delimiter=',')
directory = './data-modified-resized-clean-augment/train/data/'
# df = df.drop('Unnamed: 0', axis=1)

repeat = 50

In [4]:
paths = df.NameofFile.tolist()
labels = df.Vehicleregistrationplate.tolist()
adjusted_paths = []
adjusted_labels = []

In [5]:
for i, label in enumerate(labels):
  for j in range(repeat):
    img_base, ext = os.path.splitext(paths[i])
    img_modified = img_base+f'-{j}'+ext
    adjusted_paths.append(img_modified)
    adjusted_labels.append(label)

In [6]:
modified_df = pd.DataFrame({'labels': adjusted_labels, 'images_path': adjusted_paths})

In [7]:
modified_df

Unnamed: 0,labels,images_path
0,B187EDA,DataTrain4-0.jpg
1,B187EDA,DataTrain4-1.jpg
2,B187EDA,DataTrain4-2.jpg
3,B187EDA,DataTrain4-3.jpg
4,B187EDA,DataTrain4-4.jpg
...,...,...
35595,AD1416YD,DataTrain798-45.jpg
35596,AD1416YD,DataTrain798-46.jpg
35597,AD1416YD,DataTrain798-47.jpg
35598,AD1416YD,DataTrain798-48.jpg


In [8]:
# save the updated DataFrame to a new CSV file
modified_df.to_csv('./data-modified-localized-clean/train-aug/updated_labels.csv', index=False)