diff --git a/.gitignore b/.gitignore index 37b3e6d..bec4f9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ .DS_Store -/Data/ +/Data/fsoco_segmentation_train +/Segmentation/Yolact_minimal +/SampleData/driverless.mp4 +*.txt diff --git a/Data/README.md b/Data/README.md new file mode 100644 index 0000000..fb383ce --- /dev/null +++ b/Data/README.md @@ -0,0 +1 @@ +Clone FS coco data into here diff --git a/Projects/00Template.md b/Projects/00Template.md new file mode 100644 index 0000000..f2e6795 --- /dev/null +++ b/Projects/00Template.md @@ -0,0 +1,13 @@ +# Project Title + +## Active Members + +## Design Review Dates/Timeline + +## Introduction + +## Overview of the Problem + +## Steps in the project + +## Suggested direction \ No newline at end of file diff --git a/Projects/ControlPlanning.md b/Projects/ControlPlanning.md new file mode 100644 index 0000000..5586123 --- /dev/null +++ b/Projects/ControlPlanning.md @@ -0,0 +1,30 @@ +# Control Planning + +## Active Members +1. Ray Zou + +## Design Review Dates/Timeline +1. PDR November 3rd + +## Introduction +Control input planning is a vehicle dynamics problem to generate the +optimal set of vehicle dynamics inputs needed to follow a +predetermined path. Other projects, such as path planning will already +generate a path that must be followed. + +## Overview of the Problem +Using and working with the VD and Simulations team, there will be a +program that exists that can use 3 degrees of freedom (brake, +throttle, steering) to estimate the change of position over the next +given amount of time. Using this simulation and a given path, you must +identify what inputs are required to follow the given path. + +## Steps in the project +1. Understand the file format for the given path +1. Understand the capabilities of the vehicle dynamics simulations +1. Literature review over methods to follow a line +1. Design of a control loop +1. Implementation of input selecting for a short period of time +1. Optimization + +## Suggested direction diff --git a/Projects/IMUDataProcessing.md b/Projects/IMUDataProcessing.md new file mode 100644 index 0000000..b3d0acc --- /dev/null +++ b/Projects/IMUDataProcessing.md @@ -0,0 +1,30 @@ +# IMU Data Processing + +## Active Members +1. Saahith Veeramaneni + +## Design Review Dates/Timeline +1. Literature review by November 3rd +1. PDR by November 10th +1. Theoretical codebase, Thanksgiving + +## Introduction +Due to the low compute on the car, it is not possible to use multiple +cameras so this project will explore using structure from motion (sfm) +to create binocular views. + +## Overview of the Problem +Having multiple perspectives on a car allows the creation of a +disparity map from an image which can be used to estimate depth if the +exact distance between the cameras is well understood. Thus, we must +explore if we can achieve high enough accuracy from the IMU to use sfm +in the data processing pipeline. + +## Steps in the project +1. Literature review +1. Understanding compute requirements +1. Draft of the code if it is feasible + +## Suggested direction +1. Begin with a long literature review and exploring its use on other + Formula Student teams and in industry diff --git a/Projects/Localization.md b/Projects/Localization.md new file mode 100644 index 0000000..14d79f7 --- /dev/null +++ b/Projects/Localization.md @@ -0,0 +1,29 @@ +# Localization + +## Active Members +1. Parker Costa +1. Hansika Nerusa +1. Chanchal Mukeshsingh + +## Design Review Dates/Timeline +1. PDR by november 4th +1. Creation of an independent working system by end of first quarter + +## Introduction +Localization deals with the creation of both a global and local map +based on camera inputs so that the car can understand how its moving +and its surroundings. + +## Overview of the Problem +By being fed in raw data from the IMU, depth estimation, and cone +segmentation, the project must identify where cones are and create a +global map overtime by processing raw data and estimating where cones +are even if the data is noisy. + +## Steps in the project +1. Literature review and PDR +1. Communication and finalization of input and output data formats +1. Code base developed in a virtual test bench +1. Code review + +## Suggested direction diff --git a/Projects/PathPlanning.md b/Projects/PathPlanning.md new file mode 100644 index 0000000..c117d41 --- /dev/null +++ b/Projects/PathPlanning.md @@ -0,0 +1,28 @@ +# Path Planning + +## Active Members +1. Suhani Agarwal +1. Nathan Yee +1. Aedan Benavides + +## Design Review Dates/Timeline +1. PDR by November 3rd +1. Finished by end of winter quarter + +## Introduction +The project focuses on identifying a path from the map data by +using the cones to find the optimal path to move through the cones. + +## Overview of the Problem +When raw map data is presented it'll be done in the form of a +coordinate system on a 2D plane. To generate a path to plan a route +through, a polygon and direction representation of the path must be identified. + +## Steps in the project +1. Literature review +1. PDR +1. Triangulation methods +1. Data format standardization + +## Suggested direction +- Unknown diff --git a/Projects/Yolact.md b/Projects/Yolact.md new file mode 100644 index 0000000..b074a3a --- /dev/null +++ b/Projects/Yolact.md @@ -0,0 +1,33 @@ +# YOLACT + +## Active Members +1. Abishek Adari +1. Dylan Price +1.Jaisree D. RaviKumar + +## Design Review Dates/Timeline +1. Read the paper and PDR November 3rd +1. Implementation and training of YOLACT edge by November 24th +1. Benchmarking and optimization for edge devices, end of quarter + +## Introduction +Identifying cones for processing is a difficult problem and YOLACT +performs semantic segmentation to identify and classify each +individual pixel among their relevant classes. + +## Overview of the Problem +The self driving car must know the bounds it can travel, and the +creation of the bounds must involve identifying where in an image the +cone is. We do this using an implementation of the YOLACTEdge model +which will classify every pixel among all possibilities. We target at +least 30FPS when running alone on a Jetson Orin Nano. + +## Steps in the project +1. Read the papers +1. Base reimplementation of the project +1. Data processing +1. Training +1. Evaluation + +## Suggested direction +Read the YOLACTEdge, Yolact, and YOLACT++ papers. diff --git a/SampleData/README.md b/SampleData/README.md new file mode 100644 index 0000000..3b958dd --- /dev/null +++ b/SampleData/README.md @@ -0,0 +1,2 @@ +Download the youtube video here: +https://www.youtube.com/watch?v=o5vES5QaeiQ diff --git a/Segmentation/README.md b/Segmentation/README.md new file mode 100644 index 0000000..340244e --- /dev/null +++ b/Segmentation/README.md @@ -0,0 +1,4 @@ +Cloen this repo into this directory to run segmentation: +https://github.com/feiyuhuahuo/Yolact_minimal + +Move the FS diff --git a/Segmentation/__pycache__/convertSupervisely2Coco.cpython-312.pyc b/Segmentation/__pycache__/convertSupervisely2Coco.cpython-312.pyc new file mode 100644 index 0000000..7a18da5 Binary files /dev/null and b/Segmentation/__pycache__/convertSupervisely2Coco.cpython-312.pyc differ diff --git a/Segmentation/convertSupervisely2Coco.py b/Segmentation/convertSupervisely2Coco.py new file mode 100644 index 0000000..bcf98a0 --- /dev/null +++ b/Segmentation/convertSupervisely2Coco.py @@ -0,0 +1,143 @@ +import json +import os +import glob +import base64 +import zlib +import numpy as np +from pathlib import Path +import cv2 +from io import BytesIO + +def decodeSuperviselyBitmap(bitmapData, origin, imgHeight, imgWidth): + data = base64.b64decode(bitmapData['data']) + pngData = zlib.decompress(data) + + maskImg = cv2.imdecode(np.frombuffer(pngData, dtype=np.uint8), cv2.IMREAD_GRAYSCALE) + + if maskImg is None: + return None + + mask = (maskImg > 0).astype(np.uint8) + + fullMask = np.zeros((imgHeight, imgWidth), dtype=np.uint8) + y1, x1 = origin[1], origin[0] + y2, x2 = min(y1 + mask.shape[0], imgHeight), min(x1 + mask.shape[1], imgWidth) + fullMask[y1:y2, x1:x2] = mask[:y2-y1, :x2-x1] + return fullMask + +def maskToRle(mask): + pixels = mask.flatten() + pixels = np.concatenate([[0], pixels, [0]]) + runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 + runs[1::2] -= runs[::2] + return runs.tolist() + +def maskToBbox(mask): + rows = np.any(mask, axis=1) + cols = np.any(mask, axis=0) + if not rows.any() or not cols.any(): + return [0, 0, 0, 0] + ymin, ymax = np.where(rows)[0][[0, -1]] + xmin, xmax = np.where(cols)[0][[0, -1]] + return [int(xmin), int(ymin), int(xmax - xmin + 1), int(ymax - ymin + 1)] + +def convertSupervisely2Coco(dataRoot, outputPath): + metaPath = os.path.join(dataRoot, 'meta.json') + with open(metaPath, 'r') as f: + meta = json.load(f) + + segClasses = [c for c in meta['classes'] if c['shape'] == 'bitmap'] + classIdToCocoId = {c['id']: idx + 1 for idx, c in enumerate(segClasses)} + + cocoData = { + 'images': [], + 'annotations': [], + 'categories': [{'id': idx + 1, 'name': c['title'], 'supercategory': 'cone'} + for idx, c in enumerate(segClasses)] + } + + print(f"Found {len(segClasses)} segmentation classes:") + for cat in cocoData['categories']: + print(f" {cat['id']}: {cat['name']}") + + teamDirs = [d for d in os.listdir(dataRoot) if os.path.isdir(os.path.join(dataRoot, d))] + + imageId = 0 + annotationId = 0 + + for teamDir in sorted(teamDirs): + annDir = os.path.join(dataRoot, teamDir, 'ann') + imgDir = os.path.join(dataRoot, teamDir, 'img') + + if not os.path.exists(annDir) or not os.path.exists(imgDir): + continue + + annFiles = glob.glob(os.path.join(annDir, '*.json')) + print(f"\nProcessing {teamDir}: {len(annFiles)} images", flush=True) + + for idx, annFile in enumerate(sorted(annFiles)): + if idx % 20 == 0: + print(f" {teamDir}: {idx}/{len(annFiles)}", flush=True) + with open(annFile, 'r') as f: + ann = json.load(f) + + imgFilename = os.path.basename(annFile).replace('.json', '') + imgPath = os.path.join(teamDir, 'img', imgFilename) + + cocoData['images'].append({ + 'id': imageId, + 'file_name': imgPath, + 'height': ann['size']['height'], + 'width': ann['size']['width'] + }) + + for obj in ann['objects']: + if obj['geometryType'] != 'bitmap': + continue + + classId = obj['classId'] + if classId not in classIdToCocoId: + continue + + cocoId = classIdToCocoId[classId] + + fullMask = decodeSuperviselyBitmap(obj['bitmap'], obj['bitmap']['origin'], + ann['size']['height'], ann['size']['width']) + + if fullMask is None: + continue + + bbox = maskToBbox(fullMask) + area = int(np.sum(fullMask)) + + if area == 0: + continue + + rle = maskToRle(fullMask) + + cocoData['annotations'].append({ + 'id': annotationId, + 'image_id': imageId, + 'category_id': cocoId, + 'segmentation': [rle], + 'area': area, + 'bbox': bbox, + 'iscrowd': 0 + }) + annotationId += 1 + + imageId += 1 + + print(f"\n{'='*60}") + print(f"Total images: {len(cocoData['images'])}") + print(f"Total annotations: {len(cocoData['annotations'])}") + + with open(outputPath, 'w') as f: + json.dump(cocoData, f) + + print(f"Saved to: {outputPath}") + +if __name__ == '__main__': + dataRoot = 'Data/fsoco_segmentation_train' + outputPath = 'Data/fsoco_segmentation_train/train_coco.json' + convertSupervisely2Coco(dataRoot, outputPath) diff --git a/Segmentation/detectCones.py b/Segmentation/detectCones.py new file mode 100755 index 0000000..2386f63 --- /dev/null +++ b/Segmentation/detectCones.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import sys +import os +import subprocess + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + yolact_dir = os.path.join(script_dir, 'yolact_edge') + + args = sys.argv[1:] + + default_args = [ + '--config=yolact_edge_mobilenetv2_cone_config', + '--score_threshold=0.3', + '--top_k=100', + ] + + provided_flags = {arg.split('=')[0].lstrip('-') for arg in args if '=' in arg or arg.startswith('--')} + + final_args = [] + for default_arg in default_args: + flag = default_arg.split('=')[0].lstrip('-') + if flag not in provided_flags and not any(arg.startswith('--' + flag) for arg in args): + final_args.append(default_arg) + + final_args.extend(args) + + cmd = [sys.executable, 'eval.py'] + final_args + + print(f"Running inference in {yolact_dir}") + print(f"Command: {' '.join(cmd)}\n") + + os.chdir(yolact_dir) + + result = subprocess.run(cmd) + sys.exit(result.returncode) + +if __name__ == '__main__': + main() diff --git a/Segmentation/trainCones.py b/Segmentation/trainCones.py new file mode 100755 index 0000000..dbbe757 --- /dev/null +++ b/Segmentation/trainCones.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import sys +import os +import subprocess + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + yolact_dir = os.path.join(script_dir, 'yolact_edge') + + args = sys.argv[1:] + + default_args = [ + '--config=yolact_edge_mobilenetv2_cone_config', + '--batch_size=4', + '--save_folder=weights/', + '--validation_epoch=2', + '--save_interval=5000', + '--max_checkpoints=10', + '--num_workers=0', + ] + + provided_flags = {arg.split('=')[0].lstrip('-') for arg in args if '=' in arg or arg.startswith('--')} + + final_args = [] + for default_arg in default_args: + flag = default_arg.split('=')[0].lstrip('-') + if flag not in provided_flags and not any(arg.startswith('--' + flag) for arg in args): + final_args.append(default_arg) + + final_args.extend(args) + + cmd = [sys.executable, 'train.py'] + final_args + + print(f"Running training in {yolact_dir}") + print(f"Command: {' '.join(cmd)}\n") + + os.chdir(yolact_dir) + + result = subprocess.run(cmd) + sys.exit(result.returncode) + +if __name__ == '__main__': + main() diff --git a/viewData.py b/viewData.py new file mode 100644 index 0000000..87953be --- /dev/null +++ b/viewData.py @@ -0,0 +1,29 @@ +import json +import base64 +import io +import zlib +import numpy as np +from PIL import Image +import matplotlib.pyplot as plt + +with open('Data/fsoco_segmentation_train/epflrt/ann/amz_00825.png.json') as f: + ann = json.load(f) + +img = Image.open('Data/fsoco_segmentation_train/epflrt/img/amz_00825.png') +fullMask = np.zeros((ann['size']['height'], ann['size']['width']), dtype=np.uint8) + +for obj in ann['objects']: + if obj['geometryType'] == 'bitmap': + bitmapData = base64.b64decode(obj['bitmap']['data']) + + decompressed = zlib.decompress(bitmapData) + mask = Image.open(io.BytesIO(decompressed)).convert('L') + + origin = obj['bitmap']['origin'] + maskArray = np.array(mask) + fullMask[origin[1]:origin[1]+mask.height, origin[0]:origin[0]+mask.width] = np.maximum(fullMask[origin[1]:origin[1]+mask.height, origin[0]:origin[0]+mask.width], maskArray) + +plt.figure(figsize=(15, 8)) +plt.imshow(img) +plt.imshow(fullMask, alpha=0.5) +plt.show()