# Converting .txt files into .csv files

The preliminary storm reports are only available as manual .txt files. The goal of this notebook is to figure out how to convert the .txt files into .csv files. The files have already been converted now so no need to run these cells

In [1]:
import csv
import os

### Here I experiment with converting one .txt file into a .csv file

In [1]:

infile = "040320_rpts_hail.txt"     # tab-delimited raw file
outfile = "040320_rpts_hail.csv"    # output CSV

with open(infile, "r", encoding="utf-8") as f:
    lines = [ln.rstrip("\n") for ln in f if ln.strip()]

header = lines[0].split("\t")

# Find where Lat/Lon live
try:
    lat_i = header.index("Lat")
    lon_i = header.index("Lon")
except ValueError as e:
    raise ValueError("Could not find 'Lat' and/or 'Lon' columns in header.") from e

rows_out = []

for ln in lines[1:]:
    parts = ln.split("\t")

    # If extra tabs sneak into Comments, fold them back into the last column
    if len(parts) > len(header):
        parts = parts[:len(header) - 1] + [" ".join(parts[len(header) - 1:])]

    # Skip malformed lines
    if len(parts) != len(header):
        continue

    # Convert Lat/Lon in place
    lat_str = parts[lat_i].strip()
    lon_str = parts[lon_i].strip()

    try:
        parts[lat_i] = f"{float(lat_str)/100.0:.2f}"
    except ValueError:
        pass  # leave as-is if not numeric

    try:
        parts[lon_i] = f"{-float(lon_str)/100.0:.2f}"  # western hemisphere -> negative
    except ValueError:
        pass  # leave as-is if not numeric

    rows_out.append(parts)

with open(outfile, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(rows_out)

print(f"Wrote {len(rows_out)} rows to {outfile}")


Wrote 71 rows to 040320_rpts_hail.csv


### Trying to do the same thing, but looping through all txt files in a folder

In [17]:
import os

input_dir = "/home1/lepique/stormreports_99_txt/"
output_dir = "/home1/lepique/storm_reports/stormreports_99"

os.makedirs(output_dir, exist_ok=True)

print("Input dir:", input_dir)
print("Output dir:", output_dir)


Input dir: /home1/lepique/stormreports_99_txt/
Output dir: /home1/lepique/storm_reports/stormreports_99


In [18]:
all_files = os.listdir(input_dir)
print("All files:", all_files[:5])  # printing the first 5

All files: ['991204_rpts_hail.txt', '000329_rpts_torn.txt', '000315_rpts_wind.txt', '000307_rpts_torn.txt', '000302_rpts_wind.txt']


In [19]:
for filename in all_files:
    print(filename)

991204_rpts_hail.txt
000329_rpts_torn.txt
000315_rpts_wind.txt
000307_rpts_torn.txt
000302_rpts_wind.txt
000310_rpts_torn.txt
000320_rpts_hail.txt
000328_rpts_wind.txt
000324_rpts_hail.txt
000306_rpts_wind.txt
000314_rpts_torn.txt
000311_rpts_wind.txt
000303_rpts_torn.txt
000222_rpts_wind.txt
000227_rpts_torn.txt
000217_rpts_hail.txt
000104_rpts_wind.txt
000213_rpts_hail.txt
000226_rpts_wind.txt
000103_rpts_wind.txt
000218_rpts_wind.txt
000214_rpts_hail.txt
000224_rpts_torn.txt
000221_rpts_wind.txt
000110_rpts_wind.txt
000229_rpts_hail.txt
000225_rpts_wind.txt
000323_rpts_hail.txt
000316_rpts_wind.txt
991203_rpts_hail.txt
000309_rpts_hail.txt
000330_rpts_hail.txt
000305_rpts_wind.txt
000327_rpts_hail.txt
000314_rpts_hail.txt
000303_rpts_hail.txt
991209_rpts_hail.txt
000321_rpts_wind.txt
000318_rpts_wind.txt
000325_rpts_wind.txt
000307_rpts_hail.txt
000310_rpts_hail.txt
991204_rpts_torn.txt
000329_rpts_hail.txt
000213_rpts_torn.txt
000223_rpts_hail.txt
000109_rpts_wind.txt
000217_rpts_t

In [20]:
# Filter for only .txt files
txt_files = [f for f in all_files if f.endswith(".txt")]
for filename in txt_files:
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace(".txt", ".csv"))
    print(input_path)
    print(output_path)

/home1/lepique/stormreports_99_txt/991204_rpts_hail.txt
/home1/lepique/storm_reports/stormreports_99/991204_rpts_hail.csv
/home1/lepique/stormreports_99_txt/000329_rpts_torn.txt
/home1/lepique/storm_reports/stormreports_99/000329_rpts_torn.csv
/home1/lepique/stormreports_99_txt/000315_rpts_wind.txt
/home1/lepique/storm_reports/stormreports_99/000315_rpts_wind.csv
/home1/lepique/stormreports_99_txt/000307_rpts_torn.txt
/home1/lepique/storm_reports/stormreports_99/000307_rpts_torn.csv
/home1/lepique/stormreports_99_txt/000302_rpts_wind.txt
/home1/lepique/storm_reports/stormreports_99/000302_rpts_wind.csv
/home1/lepique/stormreports_99_txt/000310_rpts_torn.txt
/home1/lepique/storm_reports/stormreports_99/000310_rpts_torn.csv
/home1/lepique/stormreports_99_txt/000320_rpts_hail.txt
/home1/lepique/storm_reports/stormreports_99/000320_rpts_hail.csv
/home1/lepique/stormreports_99_txt/000328_rpts_wind.txt
/home1/lepique/storm_reports/stormreports_99/000328_rpts_wind.csv
/home1/lepique/stormrepo

In [21]:
files_written=0

for filename in txt_files:
    input_path = os.path.join(input_dir, filename)
    output_path = os.path.join(output_dir, filename.replace(".txt", ".csv"))
    #print(input_path)
    #print(output_path)
    
    
    with open(input_path, "r", encoding="utf-8") as f:
        lines = [ln.rstrip("\n") for ln in f if ln.strip()]

        header = lines[0].split("\t")

    ### Find where Lat/Lon live
    try:
        lat_i = header.index("Lat")
        lon_i = header.index("Lon")
    except ValueError as e:
        raise ValueError("Could not find 'Lat' and/or 'Lon' columns in header.") from e

    rows_out = []

    for ln in lines[1:]:
        parts = ln.split("\t")

        #If extra tabs get into comments, fold them back into last column
        if len(parts) > len(header):
            parts = parts[:len(header)-1] + [" ".join(parts[len(header) - 1:])]

        #Skip malformed lines
        if len(parts) != len(header):
            continue

        #Convert Lat/Lon in place
        lat_str = parts[lat_i].strip()
        lon_str = parts[lon_i].strip()

        try:
            parts[lat_i] = f"{float(lat_str)/100.0:.2f}"
        except ValueError:
            pass #leave as-is if not numeric

        try:
            parts[lon_i] = f"{-float(lon_str)/100.0:.2f}" #western hemisphere -> negative
        except ValueError:
            pass #leave as is if not numeric
        
        rows_out.append(parts)

        

    with open(output_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(header)
        writer.writerows(rows_out)

    files_written += 1

print(f"Wrote {files_written} files")

Wrote 124 files
