In [1]:
import os
import glob
import re
import pandas as pd

In [2]:
transit_folder_path = r'E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit'
model = "WF-TDM-v9x-5d7779"

In [3]:
# clean out output folder

files = glob.glob(os.path.join('output', '*'))

for file in files:
    try:
        os.remove(file)
        print(f"Deleted: {file}")
    except Exception as e:
        print(f"Error deleting {file}: {e}")


Deleted: output\files.csv
Deleted: output\lines.csv
Deleted: output\links.csv
Deleted: output\nodes.csv
Deleted: output\speeds-tf-delay.csv


In [4]:
def preprocess_and_parse_file(file_path, file_index):
    
    with open(file_path, 'r') as file:
        lines = file.readlines()

    # Get the filename and folder name two levels up
    filename = os.path.basename(file_path)
    transit_scenario = os.path.basename(os.path.dirname(file_path))

    # Preprocess lines to remove hard returns and spaces for lines that do not start with ';' or 'LINE'
    cleaned_lines = []
    buffer = ""
    for line in lines:
        # remove extra spaces
        line = line.replace(', ', ',')
        if line.startswith(';'):
            do = 'nothing'
        elif line.startswith('LINE'):
            if buffer:
                cleaned_lines.append(buffer)
                buffer = ""
            buffer = line.strip()
        else:
            buffer += line.strip()

    # flush buffer
    if buffer:
        cleaned_lines.append(buffer)
        buffer = ""

    #display(cleaned_lines)

    # Initialize a list to store the parsed lines and a list for speed and tf positions
    parsed_data = []
    speed_tf_positions = []

    for line in cleaned_lines:
        pre_n_part, n_part = re.split(r'(?=N=)', line.replace('LINE ', ''), maxsplit=1)

        if pre_n_part.endswith(','):
            pre_n_part = pre_n_part[:-1]
        
        #print (pre_n_part)
        # Create dictionaries for pre-N and N
        pre_n_dict = {}
        n_dict = {'node_list': []}

        # Split pre-N part into key-value pairs
        pre_n_attributes = pre_n_part.split(',')
        #print (pre_n_attributes)
        for attr in pre_n_attributes:
            key, value = attr.split('=')
            pre_n_dict[key.strip().lower().replace('[','').replace(']','')] = value.strip().strip('"')
            #print (key.strip())
            #print (value.strip())

        # Split N part into key-value pairs and handle special attributes
        N_list = []
        n_attributes = n_part.split(',')
        current_n_list = []
        current_tf = None
        current_speed = None
        current_delay = None

        for attr in n_attributes:
            if attr.startswith('N='):
                if current_n_list:
                    N_list.extend(current_n_list)
                    current_n_list = []
                current_n_list.extend(map(int, attr[2:].split(',')))
            elif attr.startswith('SPEED='):
                if current_n_list:
                    N_list.extend(current_n_list)
                    current_n_list = []
                current_speed = float(attr[6:])
                speed_tf_positions.append({'file_index': file_index, 'name': pre_n_dict.get('name', ''), 'type': 'speed', 'node_index': len(N_list), 'value': current_speed})
            elif attr.startswith('TF='):
                if current_n_list:
                    N_list.extend(current_n_list)
                    current_n_list = []
                current_tf = float(attr[3:])
                speed_tf_positions.append({'file_index': file_index, 'name': pre_n_dict.get('name', ''), 'type': 'tf', 'node_index': len(N_list), 'value': current_tf})
            elif attr.startswith('DELAY='):
                if current_n_list:
                    N_list.extend(current_n_list)
                    current_n_list = []
                current_delay = float(attr[6:])
                speed_tf_positions.append({'file_index': file_index, 'name': pre_n_dict.get('name', ''), 'type': 'delay', 'node_index': len(N_list), 'value': current_delay})
            else:
                current_n_list.extend(map(int, attr.split(',')))

        if current_n_list:
            n_dict['node_list'].extend(current_n_list)

        # Explode pre-N and Combine with additional metadata
        #display(pre_n_dict)

        pre_n_dict_exploded = pd.json_normalize(pre_n_dict)
        pre_n_dict_exploded['file_index'] = file_index
        pre_n_dict_exploded['model'] = model
        pre_n_dict_exploded['transit_scenario'] = transit_scenario
        pre_n_dict_exploded['filename'] = filename
        pre_n_dict_exploded['file_path'] = file_path

        # Convert the exploded DataFrame to a dictionary and flatten it
        exploded_dict = pre_n_dict_exploded.to_dict(orient='records')[0]

        # Add the flattened dictionary to parsed_data
        exploded_dict['node_list'] = n_dict['node_list']
        parsed_data.append(exploded_dict)

    return parsed_data, speed_tf_positions



In [5]:
def read_all_lin_files(transit_folder_path):
    # Get a list of all .lin files in the folder and its subfolders
    lin_files = glob.glob(os.path.join(transit_folder_path, '**', '*.lin'), recursive=True)

    # Initialize a list to store the parsed data from all files and speed/tf positions
    all_parsed_data = []
    all_speed_tf_positions = []

    # Preprocess and parse each file, then append the data to the lists
    for file_index, file_path in enumerate(lin_files):
        print(file_path)
        file_data, speed_tf_positions = preprocess_and_parse_file(file_path, file_index)
        all_parsed_data.extend(file_data)
        all_speed_tf_positions.extend(speed_tf_positions)
    
    return all_parsed_data, all_speed_tf_positions


In [6]:
# Read and parse all .lin files in the folder
parsed_data, speed_tf_positions = read_all_lin_files(transit_folder_path)

# Convert the parsed data to DataFrames
lines_df = pd.DataFrame(parsed_data)

lines_df['oneway'] = lines_df['oneway'].replace({'T': True, 'F': False})
speed_tf_df = pd.DataFrame(speed_tf_positions)

files_df = lines_df.groupby(['file_index','model','transit_scenario','filename','file_path'], as_index=False).agg(num_routes=('name','count'))

lines_df = lines_df[['name','longname','color','mode','oneway','headway1','headway2','faresystem','file_index','node_list']]

display(files_df)
display(lines_df)
display(speed_tf_df)

E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_brt_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_exp_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\mag_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\rail_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_og_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_brt_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_exp_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2019\wfrc_sl_lcl_2019.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_brt_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_exp_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\mag_lcl_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\rail_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_brt_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_og_lcl_2023.lin
E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_2023\wfrc_sl_exp_

Unnamed: 0,file_index,model,transit_scenario,filename,file_path,num_routes
0,0,WF-TDM-v9x-5d7779,Lin_2019,mag_brt_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,1
1,1,WF-TDM-v9x-5d7779,Lin_2019,mag_exp_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,2
2,2,WF-TDM-v9x-5d7779,Lin_2019,mag_lcl_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,12
3,3,WF-TDM-v9x-5d7779,Lin_2019,rail_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,5
4,4,WF-TDM-v9x-5d7779,Lin_2019,wfrc_og_lcl_2019.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,22
...,...,...,...,...,...,...
92,92,WF-TDM-v9x-5d7779,Lin_2050_Needs_MAG,wfrc_brt_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,4
93,93,WF-TDM-v9x-5d7779,Lin_2050_Needs_MAG,wfrc_core_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,36
94,94,WF-TDM-v9x-5d7779,Lin_2050_Needs_MAG,wfrc_og_lcl_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,19
95,95,WF-TDM-v9x-5d7779,Lin_2050_Needs_MAG,wfrc_sl_exp_2050UF.lin,E:\GitHub\WF-TDM-v9x\1_Inputs\4_Transit\Lin_20...,2


Unnamed: 0,name,longname,color,mode,oneway,headway1,headway2,faresystem,file_index,node_list
0,UVX,,3,9,False,6,10,4,0,"[-67409, -67413, 67421, -67396, -67411, -60530..."
1,M806_EglMtn,,3,4,True,45,0,,1,"[68503, -68543, -68579, -68612, 68609, -68606,..."
2,M807_NoCnty,,13,4,True,30,0,,1,"[50040, -69611, -69598, -69589, -69585, -69563..."
3,M821_Psn,,6,4,False,30,60,,2,"[65403, -65402, 65401, 65400, 65399, 65398, 65..."
4,M822_Psn,,6,4,False,60,0,,2,"[65403, -65402, 65401, 65400, 65399, 65398, 65..."
...,...,...,...,...,...,...,...,...,...,...
1129,SF232,,9,4,False,30,30,,96,"[15036, -21763, -21755, -21763, 21783, -21848,..."
1130,SF514,,13,4,False,30,30,,96,"[10008, -21331, -21385, -21390, -21397, -21401..."
1131,SF525,,7,4,False,30,30,,96,"[10010, -21973, -21998, 22027, -22061, 22081, ..."
1132,SF570,,13,4,False,30,30,,96,"[15059, -22663, -22687, 22747, 22742, -22733, ..."


Unnamed: 0,file_index,name,type,node_index,value
0,0,UVX,tf,41,1.00
1,0,UVX,speed,43,13.00
2,0,UVX,tf,45,1.00
3,0,UVX,speed,57,21.00
4,0,UVX,speed,66,18.00
...,...,...,...,...,...
342,93,FthlDr2100,tf,33,1.00
343,93,FthlDr2100,speed,40,21.00
344,93,BRT3533S_Core,tf,1,0.75
345,93,BRT3533S_Core,tf,2,0.75


In [7]:
# create nodes dataset

# Normalize the N column and create 'stop' column
nodes_df = lines_df.explode('node_list').reset_index(drop=True)
nodes_df = nodes_df[['file_index','name', 'node_list']]
nodes_df.rename(columns={'node_list':'node_list_entry'}, inplace=True)
nodes_df['node_id'] = nodes_df['node_list_entry'].abs()
nodes_df['is_stop'] = nodes_df['node_list_entry'] > 0

# Add the index as a separate column to indicate the position of each number in the list
nodes_df['node_seq'] = nodes_df.groupby('name').cumcount()

# Add the index as a separate column to indicate the position of each number in the list
nodes_df['node_seq'] = nodes_df.groupby('name').cumcount()


#lines_df.drop(columns=['model','transit_scenario','filename','file_path'], inplace=True)


display(nodes_df)


Unnamed: 0,file_index,name,node_list_entry,node_id,is_stop,node_seq
0,0,UVX,-67409,67409,False,0
1,0,UVX,-67413,67413,False,1
2,0,UVX,67421,67421,True,2
3,0,UVX,-67396,67396,False,3
4,0,UVX,-67411,67411,False,4
...,...,...,...,...,...,...
90635,96,SF453,-21468,21468,False,1217
90636,96,SF453,-21197,21197,False,1218
90637,96,SF453,-20924,20924,False,1219
90638,96,SF453,-20878,20878,False,1220


In [8]:
# create links dataset

links_df = nodes_df.copy()

# Create the A and B columns
links_df['A'] = links_df['node_id']
links_df['B'] = links_df['node_id'].shift(-1)

# Drop the last row where B is NaN
links_df.dropna(subset=['B'], inplace=True)

# Reset index if needed
links_df.reset_index(drop=True, inplace=True)

# Drop the original N column if not needed
links_df.drop(columns=['node_list_entry', 'node_id', 'is_stop'], inplace=True)

links_df['direction'] = 1

#df['link_id'] = df['link_id']

links_df

Unnamed: 0,file_index,name,node_seq,A,B,direction
0,0,UVX,0,67409,67413,1
1,0,UVX,1,67413,67421,1
2,0,UVX,2,67421,67396,1
3,0,UVX,3,67396,67411,1
4,0,UVX,4,67411,60530,1
...,...,...,...,...,...,...
90634,96,SF453,1216,21551,21468,1
90635,96,SF453,1217,21468,21197,1
90636,96,SF453,1218,21197,20924,1
90637,96,SF453,1219,20924,20878,1


In [9]:
# append links in other direction for oneway==False

display(links_df)

merged_df = pd.merge(links_df, lines_df, on=['file_index', 'name'])

merged_df = merged_df[merged_df['oneway']==False]

all_reversed_rows = pd.DataFrame()

# Group by 'file_index' and 'name'
grouped = merged_df.groupby(['file_index', 'name'])

for (file_index, name), group in grouped:
    if group['oneway'].iloc[0] == False:
        # Reverse the A and B columns
        reversed_rows = group.copy()
        reversed_rows['A'], reversed_rows['B'] = reversed_rows['B'], reversed_rows['A']

        # Flip the rows based on sequence sorted in descending order
        reversed_rows = reversed_rows.sort_values(by='node_seq', ascending=False).reset_index(drop=True)

        # Find the maximum node_seq value in the original group
        max_node_seq = group['node_seq'].max()

        # Assign new node_seq values to the reversed rows
        reversed_rows['node_seq'] = range(max_node_seq + 1, max_node_seq + 1 + len(reversed_rows))

        # Assign direction = 2
        reversed_rows['direction'] = 2

        # Drop the 'oneway' column from the reversed rows if not needed
        reversed_rows.drop(columns=['oneway'], inplace=True)

        # Collect the reversed rows
        all_reversed_rows = pd.concat([all_reversed_rows,reversed_rows])

display(all_reversed_rows)

# Get the list of columns in links_df
columns_to_keep = links_df.columns

# Filter the columns in links_with_reverse_direction_df to only include those in links_df
all_reversed_rows = all_reversed_rows[columns_to_keep]

# Append these new rows to the original DataFrame
links_with_reverse_direction_df = pd.concat([links_df, all_reversed_rows])

# node sequence is now link sequence
links_with_reverse_direction_df.rename(columns={'node_seq':'link_seq'}, inplace=True)

# add link_id
links_with_reverse_direction_df['link_id'] = links_with_reverse_direction_df['A'].astype(str) + '_' + links_with_reverse_direction_df['B'].astype(str)

links_with_reverse_direction_df = links_with_reverse_direction_df.sort_values(by=['file_index','name','link_seq'], ascending=True).reset_index(drop=True)


display(links_with_reverse_direction_df)

Unnamed: 0,file_index,name,node_seq,A,B,direction
0,0,UVX,0,67409,67413,1
1,0,UVX,1,67413,67421,1
2,0,UVX,2,67421,67396,1
3,0,UVX,3,67396,67411,1
4,0,UVX,4,67411,60530,1
...,...,...,...,...,...,...
90634,96,SF453,1216,21551,21468,1
90635,96,SF453,1217,21468,21197,1
90636,96,SF453,1218,21197,20924,1
90637,96,SF453,1219,20924,20878,1


Unnamed: 0,file_index,name,node_seq,A,B,direction,longname,color,mode,headway1,headway2,faresystem,node_list
0,0,UVX,13,68503,50029,2,,3,9,6,10,4,"[-67409, -67413, 67421, -67396, -67411, -60530..."
1,0,UVX,14,50029,67461,2,,3,9,6,10,4,"[-67409, -67413, 67421, -67396, -67411, -60530..."
2,0,UVX,15,67461,63443,2,,3,9,6,10,4,"[-67409, -67413, 67421, -67396, -67411, -60530..."
3,0,UVX,16,63443,67405,2,,3,9,6,10,4,"[-67409, -67413, 67421, -67396, -67411, -60530..."
4,0,UVX,17,67405,67407,2,,3,9,6,10,4,"[-67409, -67413, 67421, -67396, -67411, -60530..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,96,SF570,587,22733,22742,2,,13,4,30,30,,"[15059, -22663, -22687, 22747, 22742, -22733, ..."
40,96,SF570,588,22742,22747,2,,13,4,30,30,,"[15059, -22663, -22687, 22747, 22742, -22733, ..."
41,96,SF570,589,22747,22687,2,,13,4,30,30,,"[15059, -22663, -22687, 22747, 22742, -22733, ..."
42,96,SF570,590,22687,22663,2,,13,4,30,30,,"[15059, -22663, -22687, 22747, 22742, -22733, ..."


Unnamed: 0,file_index,name,link_seq,A,B,direction,link_id
0,0,UVX,0,67409,67413,1,67409_67413
1,0,UVX,1,67413,67421,1,67413_67421
2,0,UVX,2,67421,67396,1,67421_67396
3,0,UVX,3,67396,67411,1,67396_67411
4,0,UVX,4,67411,60530,1,67411_60530
...,...,...,...,...,...,...,...
129043,96,SF570,587,22733,22742,2,22733_22742
129044,96,SF570,588,22742,22747,2,22742_22747
129045,96,SF570,589,22747,22687,2,22747_22687
129046,96,SF570,590,22687,22663,2,22687_22663


In [10]:
# export to CSV
files_df                       .to_csv('output/files.csv'          , index=False)
lines_df                       .to_csv('output/lines.csv'          , index=False)
speed_tf_df                    .to_csv('output/speeds-tf-delay.csv', index=False)
nodes_df                       .to_csv('output/nodes.csv'          , index=False)
links_with_reverse_direction_df.to_csv('output/links.csv'          , index=False)