In [34]:
import pandas as pd
import numpy as np
import glob
import os
import re
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

data = pd.read_csv("data/traffic_sign_data/traffic_sign_recog_final.csv")

In [35]:
classes_to_take = ["pedestrianCrossing", "stop", "signalAhead","yield", "addedLane", "school","speedLimit35","laneEnds",
 "speedLimit25","turnRight", "speedLimit45","speedLimit30","speedLimitUrdbl","noLeftTurn",
 "schoolSpeedLimit25"]

data = data[data["class_name"].isin( classes_to_take)]

In [36]:
data["class_name"].value_counts()

pedestrianCrossing    6276
stop                  1914
signalAhead            999
yield                  676
addedLane              667
school                 637
speedLimit35           538
laneEnds               483
speedLimit25           349
turnRight              205
speedLimit45           141
speedLimit30           140
speedLimitUrdbl        132
noLeftTurn             108
schoolSpeedLimit25     105
Name: class_name, dtype: int64

In [37]:
import shutil

In [38]:
shutil.rmtree("data/traffic_sign_data/LISA/labels/")
os.mkdir("data/traffic_sign_data/LISA/labels")

In [13]:
# os.mkdir("RTSD")
# os.mkdir("LISA")
# os.mkdir("RTSD/labels")
# os.mkdir("LISA/labels")
# os.mkdir("RTSD/images")
# os.mkdir("LISA/images")

In [39]:
f_names = data['filename'].unique()

data["class_ids"] = le.fit_transform(data["class_name"].values)

unq_class_ids = data[["class_ids", "class_name"]].drop_duplicates().sort_values("class_ids")
unq_class_ids.index = range(unq_class_ids.shape[0])

In [40]:
unq_class_ids

Unnamed: 0,class_ids,class_name
0,0,addedLane
1,1,laneEnds
2,2,noLeftTurn
3,3,pedestrianCrossing
4,4,school
5,5,schoolSpeedLimit25
6,6,signalAhead
7,7,speedLimit25
8,8,speedLimit30
9,9,speedLimit35


In [15]:
unq_class_ids["class_name"].values

array(['addedLane', 'laneEnds', 'noLeftTurn', 'pedestrianCrossing',
       'school', 'schoolSpeedLimit25', 'signalAhead', 'speedLimit25',
       'speedLimit30', 'speedLimit35', 'speedLimit45', 'speedLimitUrdbl',
       'stop', 'turnRight', 'yield'], dtype=object)

In [41]:
for f in f_names:
    data_f = data[data["filename"] == f]
    write_to_file = []
    for r in data_f.iterrows():
        label_idx = r[1]["class_ids"]
        xmin = r[1]["xmin"]
        xmax = r[1]["xmax"]
        ymin = r[1]["ymin"]
        ymax = r[1]["ymax"]
        img_height = r[1]["height"]
        img_width = r[1]["width"]
        x_center = (xmin + xmax)/2
        x_center = x_center/img_width
        y_center = (ymin + ymax)/2
        y_center = y_center/img_height
        height = ymax - ymin
        height = height/img_height
        width = xmax - xmin
        width = width/img_width
        str_to_append = str(label_idx)+ " " + str(x_center) + " " + str(y_center) + " " + str(width) + " " + str(height) 
        write_to_file.append(str_to_append)
    if r[1]["dataset"] == "lisa":
        with open("data/traffic_sign_data/LISA/labels/"+r[1]["filename"].replace(".png", ".txt"), "w") as f:
            f.write("\n".join(write_to_file))
        f.close()
    elif r[1]["dataset"] == "rtsd":
        with open("data/traffic_sign_data/RTSD/labels/"+r[1]["filename"].replace(".jpg", ".txt"), "w") as f:
            f.write("\n".join(write_to_file))
        f.close()

In [26]:
with open("data/traffic_sign_data/LISA/labels/stop_1330545910.avi_image0.txt") as f:
    label = f.read()

In [27]:
print(label)

15 0.8681640625 0.2509578544061303 0.052734375 0.10344827586206896
14 0.42138671875 0.39272030651340994 0.0126953125 0.03065134099616858


## Training val dist LISA 

In [17]:
import pandas as pd
import numpy as np
import glob
import os
import re
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
from sklearn.model_selection import train_test_split

data = pd.read_csv("data/traffic_sign_data/traffic_sign_recog_final.csv")

In [18]:
classes_to_take = ["pedestrianCrossing", "stop", "signalAhead","yield", "addedLane", "school","speedLimit35","laneEnds",
 "speedLimit25","turnRight", "speedLimit45","speedLimit30","speedLimitUrdbl","noLeftTurn",
 "schoolSpeedLimit25"]

data = data[data["class_name"].isin( classes_to_take)]

In [19]:
f_names = data['filename'].unique()

data["class_ids"] = le.fit_transform(data["class_name"].values)

unq_class_ids = data[["class_ids", "class_name"]].drop_duplicates().sort_values("class_ids")
unq_class_ids.index = range(unq_class_ids.shape[0])

In [20]:
data_lisa = data[data["dataset"] == "lisa"]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(data_lisa, data_lisa[["class_ids"]], test_size = .15, stratify=data_lisa[["class_ids"]])

In [43]:
X_train["class_ids"].value_counts()

12    1548
3      922
6      786
9      457
7      297
0      250
14     201
1      178
10     120
8      119
4      113
11     112
5       89
13      78
2       40
Name: class_ids, dtype: int64

In [42]:
X_test["class_ids"].value_counts()

12    273
3     163
6     139
9      81
7      52
0      44
14     35
1      32
10     21
8      21
11     20
4      20
5      16
13     14
2       7
Name: class_ids, dtype: int64

In [24]:
train_names = ["data/traffic_sign_data/LISA/images/" +f for f in X_train["filename"].unique()]

In [25]:
len(train_names)/4

1142.5

In [26]:
val_names = ["data/traffic_sign_data/LISA/images/"+f for f in X_test["filename"].unique()]

In [27]:
len(val_names)/4

229.5

In [28]:
with open("data/traffic_sign_data/train_lisa.txt", 'w') as f:
    f.write("\n".join(train_names))

In [29]:
with open("data/traffic_sign_data/valid_lisa.txt", 'w') as f:
    f.write("\n".join(val_names))