In [138]:
import os
import pandas as pd
import re

In [139]:
folder_path = "Example_data\Input\Lotek_legacy_raw_txt_files"
output_csv_path = "Example_data\Input\Lotek_combined_csv"
output_name = "Lotek_combined_data.csv"

In [140]:
data = []

for dirpath, dirnames, filenames in os.walk(folder_path):
    for filename in filenames:
        if filename.lower().endswith(".txt"):
            input_file_path = os.path.join(dirpath, filename)

            with open(input_file_path, 'r') as infile:
                copy = False
                for line in infile:
                    if ' Date' in line:
                        copy = True
                        data.append(line.strip())
                    elif 'End of Data' in line:
                        copy = False
                    elif copy:
                        data.append(line.strip())

# Split data by spaces
split_data = [re.split(r'\s{2,}', row.strip()) for row in data]

# Set first row as column headings and create DataFrame
df = pd.DataFrame(split_data[1:], columns=split_data[0])

In [141]:
# A couple of data cleaning steps to weed out null rows
df = (
    df.dropna(how='all')
      .loc[~(df == 0).all(axis=1)]
      .reset_index(drop=True)
)

# Filter out rows where 'Power' column is not numeric
df = df[pd.to_numeric(df['Power'], errors='coerce').notna()]

In [142]:
# Merge dates and times into a single column
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])

# Move Datetime to be the first column
df = df[['Datetime'] + df.columns[:-1].tolist()]

# Remove separate date and time columns
df = df.drop(['Date', 'Time'], axis=1)

In [143]:
df

Unnamed: 0,Datetime,Channel,Tag ID,Antenna,Power
0,2022-10-28 08:17:12,0,178,2,66
1,2022-10-28 08:17:15,0,178,2,66
2,2022-10-28 08:17:18,0,178,2,65
3,2022-10-28 08:17:24,0,178,2,63
4,2022-10-28 08:18:26,0,178,2,64
...,...,...,...,...,...
196866,2023-02-28 11:47:04,0,186,2,68
196867,2023-02-28 11:47:07,0,186,2,69
196868,2023-02-28 11:47:10,0,186,2,69
196869,2023-02-28 11:51:08,0,186,2,66


In [144]:
output_path = os.path.join(output_csv_path, output_name)
df.to_csv(output_path, index=False)