In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import os
import csv

# Define the directory containing image folders
dir = '/content/drive/MyDrive/Public_New'

# Paths to different automaker folders
huyndai_path = os.path.join(dir, 'Huyndai')
vinfast_path = os.path.join(dir, 'VinFast')
suzuki_path = os.path.join(dir, 'Suzuki')
others_path = os.path.join(dir, 'Others')
honda_path = os.path.join(dir, 'Honda')
mazda_path = os.path.join(dir, 'Mazda')
mitsubishi_path = os.path.join(dir, 'Mitsubishi')
toyota_path = os.path.join(dir, 'Toyota')
kia_path = os.path.join(dir, 'KIA')
# Dictionary mapping paths to automaker names
automakers = {
    huyndai_path: 'Huyndai',
    vinfast_path: 'VinFast',
    suzuki_path: 'Suzuki',
    others_path: 'Others',
    honda_path: 'Honda',
    mazda_path: 'Mazda',
    mitsubishi_path: 'Mitsubishi',
    toyota_path: 'Toyota',
    kia_path: 'KIA'
}

# Dictionary to store MSSV information
MSSVs = {}

# Iterate over each automaker's path
for path, name in automakers.items():
    # Walk through files in the current path
    for _, _, files in os.walk(path):
        for file_name in files:
            # Check if file is an image file
            if file_name.endswith(('.jpg', '.jpeg', '.png')):
                # Split file name by '.'
                li = file_name.split('.')
                # Check if file name format is correct
                if len(li) != 4:
                    continue
                # Check if image belongs to the correct automaker folder
                if li[-3] != name:
                    continue
                # Extract MSSV(s) from the file name
                mssvs = li[0].split('-')

                # Update MSSVs dictionary
                for mssv in mssvs:
                    if mssv in MSSVs:
                        if name in MSSVs[mssv]:
                            MSSVs[mssv][name] += 1
                        else:
                            MSSVs[mssv][name] = 1
                    else:
                        MSSVs[mssv] = {name: 1}

# Prepare data for CSV files
data_file1 = []
data_file2 = []

# Generate data for file1 and file2
for key, items in MSSVs.items():
    num_all = sum(items.values())  # Total count for all automakers

    # Append data for file1
    data_file1.append([key, 'All', num_all])

    # Append data for file2
    for automaker, count in items.items():
        data_file2.append([key, automaker, count])

    # Add entries for automakers with no images (count = 0)
    for automaker_path, automaker_name in automakers.items():
        if automaker_name not in items:
            data_file2.append([key, automaker_name, 0])

# Define output file paths
file1 = 'drive/MyDrive/CarDataset-1.csv'
file2 = 'drive/MyDrive/CarDataset-2.csv'

# Write data to CSV files
with open(file1, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data_file1)

with open(file2, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data_file2)

# Read CSV files into Pandas DataFrames
df1 = pd.read_csv(file1, names=['MSSV', 'All', 'Số lượng'])
df2 = pd.read_csv(file2, names=['MSSV', 'Hãng xe', 'Số lượng'])
df1 = df1.sort_values(by='Số lượng', ascending=False)
df2 = df2.sort_values(by='Số lượng', ascending=False)
# Print the DataFrames
print(f'CarDataset-1.csv:\n\n{df1}\n\n')
print(f'CarDataset-2.csv:\n\n{df2}\n')


CarDataset-1.csv:

        MSSV  All  Số lượng
17  22520195  All     10858
16  22521027  All     10858
18  22521060  All     10858
25  22520862  All      3607
24  22520507  All      3607
23  22520459  All      3607
1   22520395  All      3114
0   22520394  All      3114
20  22521587  All      2356
19  22520779  All      2356
38  22521070  All      1789
39  22520211  All      1789
33  22521342  All      1644
34  22520970  All      1644
6   22521586  All      1379
7   22521559  All      1379
9   21522924  All      1192
42  22521692  All      1035
43  22521676  All      1035
41  21522771  All       815
40  21522500  All       815
50  21520938  All       775
51  22520634  All       775
3   22521599  All       717
2   22521492  All       717
8   21520930  All       596
45  22520915  All       554
44  22520914  All       552
13  22520490  All       465
12  22520477  All       465
15  22521614  All       449
14  22521560  All       449
4   21522373  All       411
5   21522499  All       411
2

In [None]:
from google.colab import files
files.download(file1)
files.download(file2)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>