### Overview of the dataset

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob

##### How many files in the dataset?

In [4]:
### Define function to count the files
def count_files(folder_path, file_extension):
    # Create a file path pattern using glob
    pattern = os.path.join(folder_path, f"*.{file_extension}")

    # Use glob to get a list of files matching the pattern
    files = glob.glob(pattern)

    # Return the count of matching files
    return len(files)

# Example usage
folder_path = "C:/Users/user/Documents/GitHub/Automotive_Diagnostics/OBD-II-Dataset"
file_extension = "csv"

count = count_files(folder_path, file_extension)
print(f"Number of {file_extension} files in {folder_path} is: {count}")

Number of csv files in C:/Users/user/Documents/GitHub/Automotive_Diagnostics/OBD-II-Dataset is: 81


### Identify unique paths

In [13]:
def identify_unique_path(folder_path):
    travel_path = []
    origin = []
    destination = []
    pattern = os.path.join(folder_path, f"*.csv")
    filenames = glob.glob(pattern)

    for filename in filenames:
        components = filename.split('_')
        origin.append(components[3])
        destination.append(components[4])
        travel_path.append(components[3] + '_' + components[4])
        
    unique_origin = set(origin)
    unique_destination = set(destination)
    unique_path = set(travel_path)
    
    return {
        "Origin": origin,
        "Destination": destination,
        "Travel Path": travel_path,
        "Unique Origin": set(origin),
        "Unique Destination": set(destination),
        "Summary": f"The dataset contains {len(unique_origin)} origin, {len(unique_destination)} destination and {len(unique_path)} unique paths"
    }


def identify_path(file_name):
    components = file_name.split('_')

    origin = components[2]
    destination = components[3]
    cond = components[4]
    return {
        "Origin": origin,
        "Destination": destination,
        "Condition": cond
    }

file_path = "OBD-II-Dataset/2017-07-05_Seat_Leon_RT_S_Stau.csv"
file_name = file_path.split('/')[-1]
# print(identify_path(file_name))

folder_path = "OBD-II-Dataset"
print(identify_unique_path(folder_path))


{'Origin': ['RT', 'S', 'KA', 'KA', 'RT', 'S', 'KA', 'KA', 'KA', 'S', 'RT', 'S', 'KA', 'RT', 'KA', 'KA', 'RT', 'RT', 'S', 'KA', 'KA', 'KA', 'KA', 'KA', 'RT', 'KA', 'KA', 'RT', 'S', 'S', 'S', 'KA', 'RT', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'S', 'BB', 'RT', 'RT', 'KA', 'BB', 'KA', 'KA', 'RT', 'RT', 'RT', 'S', 'S', 'KA', 'RT', 'RT', 'KA', 'KA', 'RT', 'S', 'RT', 'S', 'RT', 'KA', 'RT', 'S', 'RT', 'KA', 'RT', 'KA', 'KA', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'KA', 'KA', 'RT'], 'Destination': ['S', 'KA', 'KA', 'RT', 'S', 'RT', 'KA', 'KA', 'S', 'RT', 'S', 'RT', 'KA', 'KA', 'KA', 'RT', 'KA', 'S', 'KA', 'KA', 'KA', 'KA', 'RT', 'KA', 'KA', 'KA', 'RT', 'S', 'RT', 'RT', 'CW', 'RT', 'KA', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'RT', 'RT', 'BB', 'KA', 'KA', 'RT', 'BB', 'KA', 'S', 'RT', 'S', 'RT', 'S', 'KA', 'KA', 'RT', 'KA', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'KA', 'KA', 'KA', 'RT', 'S', 'RT', 'S', 'RT', 'S', 'RT', 'RT', 'KA', 'KA'], 'Travel Path': ['RT_S', 'S_KA', 'KA_KA',

##### Origin
* KA = Karlsruhe
* RT = Reutlingen
* S = Stuttgart
* BB = Boblingen

##### Destination
* CW = Calw
* KA = Karlsruhe
* RT = Reutlingen
* S = Stuttgart
* BB = Boblingen

#### Investigate the distribution of the data files based on their conditions

In [36]:
def check_condition(folder_path):
    n_free,n_normal,n_busy = 0,0,0
    file_pattern = os.path.join(folder_path, f"*.csv")
    filenames = glob.glob(file_pattern)

    for filename in filenames:
        if "Frei" in filename:
            n_free += 1        
        elif "Normal" in filename:
            n_normal += 1
        elif "Stau" in filename:
            n_busy += 1

    return {
        "Free condition": n_free,
        "Normal": n_normal,
        "Busy": n_busy
    }

file_path = "OBD-II-Dataset"
print(check_condition(file_path))
    

{'Free condition': 14, 'Normal': 56, 'Busy': 11}


#### Identify the frequency of each path