In [2]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import yaml
import csv
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import time
import cv2 as cv
from sklearn import preprocessing
import scipy.io as sio

In [3]:
GLOBAL_DEBUG = True
GLOBAL_DEBUG_VAL = 10

In [4]:
def generate_file_names(directory):
    filelist = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            filelist.append(os.path.join(root, file))
    return filelist

In [5]:
def file_name_conversion(image_paths, label_paths):
    refactored = []
    for i in range(len(image_paths)):
        temp = list(map(int, image_paths[i][0:3])) # We're discarding the year as its not useful information
        refactored.append(temp + [image_paths[i][3]] + label_paths[i])
    return refactored

In [6]:
def file_commas(path_to_attributes):
    curr_file = os.path.join(path_to_attributes, "attributes.txt")
    new_file_name = 'readable_attributes.txt'
    
    imageFile = open(curr_file, "r")
    mifFile = open(f"{path_to_attributes}/{new_file_name}", "w")
    
    for line in imageFile:
        mifFile.write(line.replace(' ',',' ).strip() + "\n")
    
    imageFile.close()
    mifFile.close()

    df = pd.read_csv(f"{path_to_attributes}/{new_file_name}")
    return df

In [7]:
def convert_to_yolo_coord(filelist, labellist):
    id_set = []
    x1_yolo = []
    y1_yolo = []
    x2_yolo = []
    y2_yolo = []
    
    for i in tqdm(range(len(filelist))):
        current_image = cv.imread(f"{filelist[i]}")
        h, w, _ = current_image.shape
        id = os.path.basename(f"{filelist[i]}")
        result = pd.read_csv(labellist[i], header=None).loc[2].values[0].split(' ')
        x1, y1, x2, y2 = map(float, result)
        if (x1 > 0 and x2 > x1 and x2 <= w and
            y1 > 0 and y2 > y1 and y2 <= h):
            diffw = x2 - x1
            diffh = y2 - y1
            cx = ((x1 + (diffw/2)) / w)
            cy = ((y1 + (diffh/2)) / h)
            nw = ((diffw) / w)
            nh = ((diffh) / h)
            x1_yolo.append(cx)
            y1_yolo.append(cy)
            x2_yolo.append(nw)
            y2_yolo.append(nh)
            id_set.append(id)

    return id_set, x1_yolo, y1_yolo, x2_yolo, y2_yolo


In [7]:
image_root = 'Z:\\CompCars\\data\\image'
label_root = 'Z:\\CompCars\\data\\label'

image_paths = sorted(generate_file_names(image_root))
label_paths = sorted(generate_file_names(label_root))

In [8]:
image_list = [image_path.replace('Z:\\CompCars\\data\\image\\', '').replace('\\', ',').split(',') for image_path in image_paths]
label_list = [label_path.replace('Z:\\CompCars\\data\\label\\100\\209\\2009\\', '').replace('\\', ',').split(',') for label_path in label_paths]

In [9]:
data_description = file_name_conversion(image_list, label_list)

In [134]:
classes = ['MPV', 'SUV', 'Sedan','Hatchback','Minibus','Fastback','Wagon','Pickup','Hardtop Convertible','Sports','Crossover','Convertible']
decode_descriptions = file_commas(r'Z:/CompCars/data/misc')

In [11]:
decode_descriptions.head()

Unnamed: 0,model_id,maximum_speed,displacement,door_number,seat_number,type
0,1,235,1.8,5,5,4
1,2,220,1.8,4,5,3
2,3,228,2.0,4,5,3
3,4,230,2.0,5,5,2
4,5,234,3.0,5,5,2


In [12]:
car_type_name = [classes[x] for x in decode_descriptions['type']]
decode_descriptions['car_type_name'] = car_type_name

In [18]:
csv_file_name = "Z:/CompCars/data/misc/new_data_format.csv"

df = pd.read_csv(csv_file_name)
df.head()

Unnamed: 0,make_id,model_id,year,image,label
0,100,209,2009,0ff9e9b49c52cd.jpg,0ff9e9b49c52cd.txt
1,100,209,2009,103597e5e5f4c7.jpg,103597e5e5f4c7.txt
2,100,209,2009,1cb896f2a1632f.jpg,1cb896f2a1632f.txt
3,100,209,2009,35bc7056ec3f25.jpg,35bc7056ec3f25.txt
4,100,209,2009,398aad9e2ab271.jpg,398aad9e2ab271.txt


In [33]:
# Load the .mat file
mat_data = sio.loadmat('Z:/CompCars/data/misc/make_model_name.mat')

# Specify the variable name you want to convert to CSV
model_name = 'model_names'
make_name = 'make_names'
# Access the variable data
make_data = mat_data[make_name]
model_data = mat_data[model_name]

model_desc = [model_data[i - 1][0][0] for i in decode_descriptions['model_id']]
decode_descriptions['model_name'] = model_desc

make_desc = [make_data[i - 1][0][0] for i in df['make_id']]
model_to_df = [model_data[i - 1][0][0] for i in df['model_id']]

df['model_name'] = model_to_df
df['make_name'] = make_desc

type_to_df = []
for i in df['model_id']:
    for j in decode_descriptions['model_id']:
        if i == j:
            mask = (decode_descriptions['model_id'] == j)
            index = decode_descriptions.index[mask].tolist()[0]
            type_to_df.append(decode_descriptions['car_type_name'][index])

df['car_type_name'] = type_to_df

In [34]:
df.head()
decode_descriptions.head()

Unnamed: 0,make_id,model_id,year,image,label,make_name,model_name,car_type_name
0,100,209,2009,0ff9e9b49c52cd.jpg,0ff9e9b49c52cd.txt,Honda,Odyssey,MPV
1,100,209,2009,103597e5e5f4c7.jpg,103597e5e5f4c7.txt,Honda,Odyssey,MPV
2,100,209,2009,1cb896f2a1632f.jpg,1cb896f2a1632f.txt,Honda,Odyssey,MPV
3,100,209,2009,35bc7056ec3f25.jpg,35bc7056ec3f25.txt,Honda,Odyssey,MPV
4,100,209,2009,398aad9e2ab271.jpg,398aad9e2ab271.txt,Honda,Odyssey,MPV


Unnamed: 0,model_id,maximum_speed,displacement,door_number,seat_number,type,car_type_name,model_name
0,1,235,1.8,5,5,4,Hatchback,Audi A3 hatchback
1,2,220,1.8,4,5,3,Sedan,Audi A4L
2,3,228,2.0,4,5,3,Sedan,Audi A6L
3,4,230,2.0,5,5,2,SUV,Audi Q3
4,5,234,3.0,5,5,2,SUV,Audi Q5


In [40]:
img_file_name, cx, cy, nw, nh = convert_to_yolo_coord(image_paths, label_paths) 



100%|██████████████████████████████████████████████████████████████████████████| 136423/136423 [34:49<00:00, 65.29it/s]


In [50]:
x1 = []
y1 = []
x2 = []
y2 = []

filtered_df = df[df['image'].isin(img_file_name)]
print(filtered_df)
for i in tqdm(filtered_df['image']):
    index = img_file_name.index(i)
    x1.append(cx[index])
    y1.append(cy[index])
    x2.append(nw[index])
    y2.append(nh[index])


        make_id  model_id  year               image               label  \
0           100       209  2009  0ff9e9b49c52cd.jpg  0ff9e9b49c52cd.txt   
1           100       209  2009  103597e5e5f4c7.jpg  103597e5e5f4c7.txt   
2           100       209  2009  1cb896f2a1632f.jpg  1cb896f2a1632f.txt   
3           100       209  2009  35bc7056ec3f25.jpg  35bc7056ec3f25.txt   
4           100       209  2009  398aad9e2ab271.jpg  398aad9e2ab271.txt   
...         ...       ...   ...                 ...                 ...   
136418        9      1116  2014  ead3092d0f77cf.jpg                  Z:   
136419        9      1116  2014  eb87452da4afca.jpg                  Z:   
136420        9      1116  2014  ec2c704ba9a644.jpg                  Z:   
136421        9      1116  2014  f1fb55695b3b71.jpg                  Z:   
136422        9      1116  2014  fcbbf50e539c24.jpg                  Z:   

        make_name    model_name car_type_name  
0           Honda       Odyssey           MPV  
1  

100%|████████████████████████████████████████████████████████████████████████| 106386/106386 [01:22<00:00, 1288.88it/s]


In [52]:
filtered_df['x_1'] = x1
filtered_df['y_1'] = y1
filtered_df['x_2'] = x2
filtered_df['y_2'] = y2
filtered_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['x_1'] = x1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['y_1'] = y1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['x_2'] = x2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the c

Unnamed: 0,make_id,model_id,year,image,label,make_name,model_name,car_type_name,x_1,y_1,x_2,y_2
0,100,209,2009,0ff9e9b49c52cd.jpg,0ff9e9b49c52cd.txt,Honda,Odyssey,MPV,0.502203,0.567891,0.795154,0.864217
1,100,209,2009,103597e5e5f4c7.jpg,103597e5e5f4c7.txt,Honda,Odyssey,MPV,0.509901,0.566986,0.529153,0.614035
2,100,209,2009,1cb896f2a1632f.jpg,1cb896f2a1632f.txt,Honda,Odyssey,MPV,0.519824,0.563898,0.830396,0.808307
3,100,209,2009,35bc7056ec3f25.jpg,35bc7056ec3f25.txt,Honda,Odyssey,MPV,0.490639,0.599042,0.862335,0.760383
4,100,209,2009,398aad9e2ab271.jpg,398aad9e2ab271.txt,Honda,Odyssey,MPV,0.490088,0.586262,0.768722,0.523962


In [67]:
pre_data_reduction_csv_dir = 'Z:/CompCarsYOLO/data/pre_data_reduction.csv'
filtered_df.to_csv(pre_data_reduction_csv_dir, index=False)

NameError: name 'filtered_df' is not defined

In [54]:
unclassified_filter = 'UNDEFINED'

unclass_mask = filtered_df['car_type_name'] != unclassified_filter
mid_point_data = filtered_df.loc[unclass_mask]

In [55]:
mid_point_data.head()

Unnamed: 0,make_id,model_id,year,image,label,make_name,model_name,car_type_name,x_1,y_1,x_2,y_2
0,100,209,2009,0ff9e9b49c52cd.jpg,0ff9e9b49c52cd.txt,Honda,Odyssey,MPV,0.502203,0.567891,0.795154,0.864217
1,100,209,2009,103597e5e5f4c7.jpg,103597e5e5f4c7.txt,Honda,Odyssey,MPV,0.509901,0.566986,0.529153,0.614035
2,100,209,2009,1cb896f2a1632f.jpg,1cb896f2a1632f.txt,Honda,Odyssey,MPV,0.519824,0.563898,0.830396,0.808307
3,100,209,2009,35bc7056ec3f25.jpg,35bc7056ec3f25.txt,Honda,Odyssey,MPV,0.490639,0.599042,0.862335,0.760383
4,100,209,2009,398aad9e2ab271.jpg,398aad9e2ab271.txt,Honda,Odyssey,MPV,0.490088,0.586262,0.768722,0.523962


In [61]:
mid_point_data['car_type_id'] = [classes.index(x)-1 for x in mid_point_data['car_type_name']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mid_point_data['car_type_id'] = [classes.index(x)-1 for x in mid_point_data['car_type_name']]


In [8]:
mid_point_data = pd.read_csv('Z:/CompCarsYOLO/data/pre_data_reduction.csv')
mid_point_data.head()

Unnamed: 0,car_type_id,x_1,y_1,x_2,y_2,year,make_name,model_name,car_type_name,make_id,model_id,avg_curb_weight
0,0,0.502203,0.567891,0.795154,0.864217,2009,Honda,Odyssey,MPV,100,209,
1,0,0.509901,0.566986,0.529153,0.614035,2009,Honda,Odyssey,MPV,100,209,
2,0,0.519824,0.563898,0.830396,0.808307,2009,Honda,Odyssey,MPV,100,209,
3,0,0.490639,0.599042,0.862335,0.760383,2009,Honda,Odyssey,MPV,100,209,
4,0,0.490088,0.586262,0.768722,0.523962,2009,Honda,Odyssey,MPV,100,209,


In [9]:
desired_column_order = ['car_type_id', 'x_1', 'y_1', 'x_2', 'y_2', 'year', 'make_name', 'model_name', 'car_type_name', 'make_id', 'model_id', 'avg_curb_weight']
reconfiged_df = mid_point_data[desired_column_order]

In [10]:
reconfiged_df.head()

Unnamed: 0,car_type_id,x_1,y_1,x_2,y_2,year,make_name,model_name,car_type_name,make_id,model_id,avg_curb_weight
0,0,0.502203,0.567891,0.795154,0.864217,2009,Honda,Odyssey,MPV,100,209,
1,0,0.509901,0.566986,0.529153,0.614035,2009,Honda,Odyssey,MPV,100,209,
2,0,0.519824,0.563898,0.830396,0.808307,2009,Honda,Odyssey,MPV,100,209,
3,0,0.490639,0.599042,0.862335,0.760383,2009,Honda,Odyssey,MPV,100,209,
4,0,0.490088,0.586262,0.768722,0.523962,2009,Honda,Odyssey,MPV,100,209,


In [11]:
scraped_data = 'Z:/CompCarsYOLO/data/weight_to_model.csv'
df = pd.read_csv(scraped_data)
df.head()

Unnamed: 0,make,model,year,trim,curb_weight_lbs
0,Ford,Festiva,1990,L,1713
1,Ford,Festiva,1990,L Plus,1713
2,Suzuki,Swift,1992,GA,1716
3,Suzuki,Swift,1990,GA,1716
4,Suzuki,Swift,1991,GA,1720


In [12]:
def model_to_weight_correlation(listob):
    cum = 0
    count = 0
    for weight in range(len(temp)):
        cum += int(listob[weight])
        count += 1

    avg_weight = int(cum/count)
    
    return avg_weight

In [14]:
words_to_remove = reconfiged_df['make_name'].unique().tolist()
words_to_remove = words_to_remove + ['UNDEFINED','mpv', 'suv', 'sedan','hatchback','minibus','fastback','wagon', 'estate','pickup','hardtop convertible','sports','crossover','convertible']
temp = reconfiged_df['model_name'].copy()
temp = temp.tolist()
for i in tqdm(range(len(temp))):
    for word in words_to_remove:   
        if word in temp[i]:
            new = temp[i].replace(word, '').strip()
            temp[i] = new

reconfiged_df['model_name'] = temp 

100%|█████████████████████████████████████████████████████████████████████████| 76393/76393 [00:01<00:00, 53730.97it/s]


In [15]:
pre_data_reduction_csv_dir = 'Z:/CompCarsYOLO/data/pre_data_reduction_fixed_names.csv'
reconfiged_df.to_csv(pre_data_reduction_csv_dir, index=False)

In [253]:
model_temp = reconfiged_df['model_name'].tolist()
weight_temp = reconfiged_df['avg_curb_weight'].tolist()

for i in tqdm(range(len(model_temp))):
    for j in df['model'].unique():
        if model_temp[i] == j:
            temp = df[df['model'] == j]
            avg_weight = model_to_weight_correlation(temp['curb_weight_lbs'].tolist())
            weight_temp[i] = avg_weight

cope = reconfiged_df.copy()
cope['avg_curb_weight'] = weight_temp

100%|███████████████████████████████████████████████████████████████████████████| 76393/76393 [04:37<00:00, 274.89it/s]


In [20]:
cope = pd.read_csv('Z:/CompCarsYOLO/data/pre_data_reduction_fixed_names_w_weight.csv')
cope.head()

Unnamed: 0,car_type_id,x_1,y_1,x_2,y_2,year,make_name,model_name,car_type_name,make_id,model_id,avg_curb_weight
0,0,0.502203,0.567891,0.795154,0.864217,2009,Honda,Odyssey,MPV,100,209,4441.0
1,0,0.509901,0.566986,0.529153,0.614035,2009,Honda,Odyssey,MPV,100,209,4441.0
2,0,0.519824,0.563898,0.830396,0.808307,2009,Honda,Odyssey,MPV,100,209,4441.0
3,0,0.490639,0.599042,0.862335,0.760383,2009,Honda,Odyssey,MPV,100,209,4441.0
4,0,0.490088,0.586262,0.768722,0.523962,2009,Honda,Odyssey,MPV,100,209,4441.0


In [23]:
desired_column_order = ['car_type_id', 'x_1', 'y_1', 'x_2', 'y_2', 'year', 'make_name', 'model_name', 'car_type_name', 'make_id', 'model_id', 'avg_curb_weight']
fill_all= cope[desired_column_order]
fill_id=fill_all.dropna(subset='car_type_id')
fill_na=fill_all.dropna()
fill_temp =fill_all.isna()
fill_all_df_config = fill_na.drop_duplicates(subset=['model_name'])

class_weights = np.zeros(12)
counter_=  np.zeros(12)

mpv = fill_all_df_config[fill_all_df_config['car_type_id']==0]
suv = fill_all_df_config[fill_all_df_config['car_type_id']==1]
sed = fill_all_df_config[fill_all_df_config['car_type_id']==2]
hat =fill_all_df_config[fill_all_df_config['car_type_id']==3]
mini= fill_all_df_config[fill_all_df_config['car_type_id']==4]
fast=  fill_all_df_config[fill_all_df_config['car_type_id']==5]
wag=fill_all_df_config[fill_all_df_config['car_type_id']==6]
picku= fill_all_df_config[fill_all_df_config['car_type_id']==7]
hardc= fill_all_df_config[fill_all_df_config['car_type_id']==8]
sports= fill_all_df_config[fill_all_df_config['car_type_id']==9]
cross=fill_all_df_config[fill_all_df_config['car_type_id']==10]
conver=fill_all_df_config[fill_all_df_config['car_type_id']==11]



for i in range(12):
    if i==0:
        for j in mpv['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==1:
        for j in suv['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==2:
        for j in sed['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==3:
        for j in hat['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==4:
        for j in mini['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==5:
        for j in fast['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==6:
        for j in wag['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==7:
        for j in picku['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==8:
        for j in hardc['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==9:
        for j in sports['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==10:
        for j in cross['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1
    elif i==11:
        for j in conver['avg_curb_weight']:
            class_weights[i] += j
            counter_[i] += 1


avg_weight_class = class_weights/counter_
avg_weight_class[10] = (avg_weight_class[1] + avg_weight_class[2])/2

emptys=fill_id


for index,row in fill_id.iterrows():
    if fill_id.loc[index,'car_type_id']==0 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[0])
    elif fill_id.loc[index,'car_type_id']==1 and fill_id.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[1])
    elif fill_id.loc[index,'car_type_id']==2 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[2])
    elif fill_id.loc[index,'car_type_id']==3 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[3])
    elif fill_id.loc[index,'car_type_id']==4 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[4])
    elif fill_id.loc[index,'car_type_id']==5 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[5])
    elif fill_id.loc[index,'car_type_id']==6 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[6])
    elif fill_id.loc[index,'car_type_id']==7 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[7])
    elif fill_id.loc[index,'car_type_id']==8 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[8])
    elif fill_id.loc[index,'car_type_id']==9 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[9])
    elif fill_id.loc[index,'car_type_id']==10 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[10])
    elif fill_id.loc[index,'car_type_id']==11 and fill_temp.loc[index,'avg_curb_weight']==True:
        fill_id.loc[index,'avg_curb_weight']=int(avg_weight_class[11])


# final_con= pd.concat([fill_id, emptys], axis=1)
 
holder_cope=fill_id
holder_cope['avg_curb_weight']=fill_id['avg_curb_weight']
final_con =holder_cope
final_con.head()

  avg_weight_class = class_weights/counter_


Unnamed: 0,car_type_id,x_1,y_1,x_2,y_2,year,make_name,model_name,car_type_name,make_id,model_id,avg_curb_weight
0,0,0.502203,0.567891,0.795154,0.864217,2009,Honda,Odyssey,MPV,100,209,4441.0
1,0,0.509901,0.566986,0.529153,0.614035,2009,Honda,Odyssey,MPV,100,209,4441.0
2,0,0.519824,0.563898,0.830396,0.808307,2009,Honda,Odyssey,MPV,100,209,4441.0
3,0,0.490639,0.599042,0.862335,0.760383,2009,Honda,Odyssey,MPV,100,209,4441.0
4,0,0.490088,0.586262,0.768722,0.523962,2009,Honda,Odyssey,MPV,100,209,4441.0


In [32]:
weights_csv_dir = 'Z:/CompCarsYOLO/data/avg_weight_on_per_class.csv'
avg_w = avg_weight_class.tolist()
df = pd.DataFrame(avg_w)
df.to_csv(weights_csv_dir, index=False)

In [323]:
default_drop_all_df_config = final_con.dropna()
default_drop_all_df_config.head()

Unnamed: 0,model_name,car_type_id,avg_curb_weight
0,Odyssey,0,4441.0
1,Odyssey,0,4441.0
2,Odyssey,0,4441.0
3,Odyssey,0,4441.0
4,Odyssey,0,4441.0


In [324]:
print(default_drop_all_df_config.shape)
print(cope.shape)

(55144, 3)
(76393, 12)


In [326]:
pre_data_reduction_csv_dir = 'Z:/CompCarsYOLO/data/pre_data_reduction_fixed_names_w_weight_averaged.csv'
final_con.to_csv(pre_data_reduction_csv_dir, index=False)

In [304]:
print(default_drop_all_df_config.shape)
print(cope.shape)

(21709, 3)
(76393, 12)


In [33]:
final_csv_dir = 'Z:/CompCarsYOLO/data/final_data_config.csv'
final_con.to_csv(final_csv_dir, index=False)