In [2]:
import os
import glob
import re
import pandas as pd

In [3]:
transit_folder_path = r'E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit'

In [4]:
def preprocess_and_parse_file(file_path, file_index):
    
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Get the filename and folder name two levels up
    filename = os.path.basename(file_path)
    transit_scenario = os.path.basename(os.path.dirname(file_path))
    model = os.path.basename(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))

    # Preprocess lines to remove hard returns and spaces for lines that do not start with ';' or 'LINE'
    cleaned_lines = []
    buffer = ""
    for line in lines:
        # remove extra spaces
        line = line.replace(', ', ',')
        if line.startswith(';'):
            do = 'nothing'
        elif line.startswith('LINE'):
            if buffer:
                buffer = buffer.replace(', ', ',')
                cleaned_lines.append(buffer)
                buffer = ""
            buffer = line.strip()
        else:
            buffer += line.strip()

    # flush buffer
    if buffer:
        buffer = buffer.replace(', ', ',')
        cleaned_lines.append(buffer)
        buffer = ""

    #display(cleaned_lines)

    # Initialize a list to store the parsed lines and a list for speed and tf positions
    parsed_data = []
    speed_tf_positions = []

    # Regular expression pattern for parsing a single line with all fields
    line_pattern = re.compile(
        r'LINE NAME="(?P<name>[^"]+)",\s*COLOR=(?P<color>\d+),\s*MODE=(?P<mode>\d+),\s*ONEWAY=(?P<oneway>[TF]),\s*'
        r'HEADWAY\[1\]=(?P<headway1>\d+),\s*HEADWAY\[2\]=(?P<headway2>\d+)(?:,\s*FARESYSTEM=(?P<faresystem>\d+))?,\s*'
        r'(?P<N>.+)'
    )

    # Parse each cleaned line
    for line in cleaned_lines:
        match = line_pattern.search(line)
        if match:
            match_dict = match.groupdict()
            match_dict['file_index'] = file_index  # Add file index to the parsed record
            match_dict['model'] = model
            match_dict['transit_scenario'] = transit_scenario
            match_dict['filename'] = filename
            match_dict['file_path'] = file_path  # Add filename to the parsed record
            N_list = []
            attributes = match_dict.pop('N').split(',')
            current_n_list = []
            current_tf = None
            current_speed = None

            for attr in attributes:
                if attr.startswith('N='):
                    if current_n_list:
                        N_list.extend(current_n_list)
                        current_n_list = []
                    current_n_list.extend(map(int, attr[2:].split(',')))
                elif attr.startswith('SPEED='):
                    if current_n_list:
                        N_list.extend(current_n_list)
                        current_n_list = []
                    current_speed = int(attr[6:])
                    speed_tf_positions.append({'file_index': match_dict['file_index'], 'name': match_dict['name'], 'type': 'SPEED', 'node_index': len(N_list), 'value': current_speed})
                elif attr.startswith('TF='):
                    if current_n_list:
                        N_list.extend(current_n_list)
                        current_n_list = []
                    current_tf = int(attr[3:])
                    speed_tf_positions.append({'file_index': match_dict['file_index'], 'name': match_dict['name'], 'type': 'TF', 'node_index': len(N_list), 'value': current_tf})
                else:
                    current_n_list.extend(map(int, attr.split(',')))

            if current_n_list:
                N_list.extend(current_n_list)

            match_dict['N'] = N_list

            # Append the parsed data to the list
            parsed_data.append(match_dict)

    return parsed_data, speed_tf_positions


In [5]:
def read_all_lin_files(transit_folder_path):
    # Get a list of all .lin files in the folder and its subfolders
    lin_files = glob.glob(os.path.join(transit_folder_path, '**', '*.lin'), recursive=True)

    # Initialize a list to store the parsed data from all files and speed/tf positions
    all_parsed_data = []
    all_speed_tf_positions = []

    # Preprocess and parse each file, then append the data to the lists
    for file_index, file_path in enumerate(lin_files):
        print(file_path)
        file_data, speed_tf_positions = preprocess_and_parse_file(file_path, file_index)
        all_parsed_data.extend(file_data)
        all_speed_tf_positions.extend(speed_tf_positions)
    
    return all_parsed_data, all_speed_tf_positions


In [6]:

# Read and parse all .lin files in the folder
parsed_data, speed_tf_positions = read_all_lin_files(transit_folder_path)

# Convert the parsed data to DataFrames
df = pd.DataFrame(parsed_data)
speed_tf_df = pd.DataFrame(speed_tf_positions)


# Normalize the N column and create 'stop' column
exploded_df = df.explode('N').reset_index(drop=True)
exploded_df = exploded_df[['file_index','name', 'N']]
exploded_df['node_id'] = exploded_df['N'].abs()
exploded_df['is_stop'] = exploded_df['N'] > 0

# Add the index as a separate column to indicate the position of each number in the list
exploded_df['node_seq'] = exploded_df.groupby('name').cumcount()

# Add the index as a separate column to indicate the position of each number in the list
exploded_df['node_seq'] = exploded_df.groupby('name').cumcount()

df_files = df.groupby(['file_index','model','transit_scenario','filename','file_path'], as_index=False).agg(numRoutes=('name','count'))

df = df[['file_index','name','color','oneway','headway1','headway2','faresystem','N']]#.drop(columns=['model','transit_scenario','filename','file_path'], inplace=True)

display(df_files)
display(df)
display(speed_tf_df)
display(exploded_df)


E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_brt_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_exp_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\rail_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_og_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_brt_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_exp_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_brt_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_exp_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_lcl_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\rail_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_brt_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_og_lcl_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_sl_exp_

Unnamed: 0,file_index,model,transit_scenario,filename,file_path,numRoutes
0,0,WF-TDM-v9x,Lin_2019,mag_brt_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,1
1,1,WF-TDM-v9x,Lin_2019,mag_exp_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,1
2,2,WF-TDM-v9x,Lin_2019,mag_lcl_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,4
3,4,WF-TDM-v9x,Lin_2019,wfrc_og_lcl_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,5
4,6,WF-TDM-v9x,Lin_2019,wfrc_sl_exp_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,11
...,...,...,...,...,...,...
67,92,WF-TDM-v9x,Lin_2050_Needs_MAG,wfrc_brt_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,1
68,93,WF-TDM-v9x,Lin_2050_Needs_MAG,wfrc_core_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,1
69,94,WF-TDM-v9x,Lin_2050_Needs_MAG,wfrc_og_lcl_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,3
70,95,WF-TDM-v9x,Lin_2050_Needs_MAG,wfrc_sl_exp_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,2


Unnamed: 0,file_index,name,color,oneway,headway1,headway2,faresystem,N
0,0,UVX,3,F,6,10,4,"[66744, -66743, -66750, -66768, 66770, -71485,..."
1,1,M807_NoCnty,13,T,30,0,,"[50040, -69611, -69598, -69589, -69585, -69563..."
2,2,M821_Psn,6,F,30,60,,"[65403, -65402, 65401, 65400, 65399, 65398, 65..."
3,2,M822_Psn,6,F,60,0,,"[65403, -65402, 65401, 65400, 65399, 65398, 65..."
4,2,M831_WPr,7,F,30,30,,"[50024, -66841, -66826, 66825, -66837, 66874, ..."
...,...,...,...,...,...,...,...,...
382,96,S513,7,F,30,0,,"[10019, -15115, -25263, 25286, -25295, -25329,..."
383,96,SF11,7,F,60,60,,"[-26001, 25975, 25948, 25970, 26020, -26039, 2..."
384,96,SF232,9,F,30,30,,"[15036, -21763, -21755, -21763, 21783, -21848,..."
385,96,SF525,7,F,30,30,,"[10010, -21973, -21998, 22027, -22061, 22081, ..."


Unnamed: 0,file_index,name,type,node_index,value
0,0,UVX,TF,41,1
1,0,UVX,SPEED,43,13
2,0,UVX,TF,45,1
3,0,UVX,SPEED,57,21
4,0,UVX,SPEED,66,18
5,0,UVX,TF,76,1
6,8,UVX,TF,41,1
7,8,UVX,SPEED,43,13
8,8,UVX,TF,45,1
9,8,UVX,SPEED,57,21


Unnamed: 0,file_index,name,N,node_id,is_stop,node_seq
0,0,UVX,66744,66744,True,0
1,0,UVX,-66743,66743,False,1
2,0,UVX,-66750,66750,False,2
3,0,UVX,-66768,66768,False,3
4,0,UVX,66770,66770,True,4
...,...,...,...,...,...,...
37631,96,SF453,-21468,21468,False,1217
37632,96,SF453,-21197,21197,False,1218
37633,96,SF453,-20924,20924,False,1219
37634,96,SF453,-20878,20878,False,1220


In [7]:
# export to CSV
df_files   .to_csv('output/files.csv')
df         .to_csv('output/routes.csv')
speed_tf_df.to_csv('output/speeds-tf.csv')
exploded_df.to_csv('output/node.csv')