### Import Libraries

In [40]:
import pandas as pd
import numpy as np
import os
import glob

### Function Declarations

In [41]:
class Spectrum:
    
    def __init__(self, RangeName, Spectrum_suffix, WaveLength_MIN = 0.0, Wavelength_MAX = 99999.9):
        self.rangeName = RangeName
        self.ext = ".sed"
        self.suffix = Spectrum_suffix
        self.range = (WaveLength_MIN, Wavelength_MAX)
        
class SpectTools():
    
    def GetValueOfDataLine(line):
        number_start = 1 * (str(line)[0:0] == ' ')
        number_end = str(line).find("\t")
        number = float(str(line)[number_start:number_end])
        return number
    
    def GetFirstDataLine(data):
        line_end = data.find("\n")+1
        return data[0:line_end]
    
    def GetLastDataLine(data):
        lastLine = data.rfind('\n', 0,len(data)-1)+1
        return(data[lastLine:len(data)])
    
    def DeleteFirstDataLine(data):
        return data.replace(GetFirstDataLine,"")
    
    def GetDataRange(data):
        first = SpectTools.GetValueOfDataLine(SpectTools.GetFirstDataLine(data))
        last = SpectTools.GetValueOfDataLine(SpectTools.GetLastDataLine(data))
        return(first,last)
    
    def GetData(source_target):
        file = open(source_target).read()
        data_start = file.rfind("]")+2
        return file[data_start:len(file)]
    
    def GetHeaderAndFilename(spectrum, source_target, dataRange, FileExtensionType = ".sed"):
        file = open(source_target, "r").read()
        extType = FileExtensionType
        filename_internal = file[file.rfind("\\")+1:file.find(extType)+len(extType)]
        filename_external = source_target[source_target.rfind("/")+1:source_target.find(extType)+len(extType)]
        #print(source_target)
        newFileName_internal = filename_internal.replace(extType, spectrum.suffix+extType)
        newFileName_external = filename_external.replace(extType, spectrum.suffix+extType)
        range_min = max(dataRange[0],spectrum.range[0])
        range_max = min(dataRange[1],spectrum.range[1])        
        comment = " This file has been automatically generated based on \'" + filename_external + "\' to only include only the subset of measurements that occur within the user-defined \'"+spectrum.rangeName+"\' EM range, with wavelengths between "+str(spectrum.range[0])+"nm and "+str(spectrum.range[1])+"nm."
        channels_string = str(int(range_max - range_min + 1))
        
        if dataRange[1] < spectrum.range[0] or dataRange[0]>spectrum.range[1]:
            WL_String = " ERROR - SPECTRUM OUT OF RANGE"
            channels_string = "0"
            comment = " ERROR - No readings have been found within \'"+source_target+"\' for the spectrum given! (" + spectrum.rangeName + " band: "+ str(spectrum.range[0]) + "nm to "+ str(spectrum.range[1]) +"nm)"
            newFileName_internal += ".error"
            newFileName_external += ".error"
        else:
            WL_String = str(int(range_min)) + "," + str(int(range_max))
             
        comment_index = len("Comment:")
        version_index = file.find("Version")
        extension_index = file.rfind(filename_internal)
        instr_index = file.find("Instrument")
        wlr_index = file.find("Wavelength Range:") + len("Wavelength Range:") + 1
        wlr_index_end = file.find("Latitude:")
        Channels_index =  file.find("Channels: ") + len("Channels: ")
        Channels_index_end = file.find("Columns ")
        header_end_index = file.find("Reflect. [1.0]\n") + len("Reflect. [1.0]\n")        
                        
        header = file[0:comment_index] + comment + "\n"
        header += file[version_index:extension_index] + newFileName_internal + "\n"
        header += file[instr_index:wlr_index] + WL_String + "\n"
        header += file[wlr_index_end:Channels_index] + channels_string + "\n"
        header += file[Channels_index_end:header_end_index]
        
        return (newFileName_external,header)
   
    def LineIfWithinRange(line, dataRange):
        number = SpectTools.GetValueOfDataLine(line)
        if (number >= dataRange[0]) and (number <= dataRange[1]):
            return line
        else:
            return ""

    def GetAllLinesInRange(data, dataRange):
        dataset = ""
        while len(data) > 7:
            line = SpectTools.GetFirstDataLine(data)
            dataset += SpectTools.LineIfWithinRange(line, dataRange)
            data = data.replace(line, "")
        return dataset
    
    def ProcessSpectra(Spectra, SED_Target):       
        modifiedFiles = []        
        data = SpectTools.GetData(SED_Target)
        d_range = SpectTools.GetDataRange(data)
        
        for spectrum in Spectra:
            headerInfo = SpectTools.GetHeaderAndFilename(spectrum, SED_Target, d_range)
            fileData = SpectTools.GetAllLinesInRange(data, spectrum.range)
            fileName = headerInfo[0]
            fileHeader = headerInfo[1]
            fileContent = fileHeader + fileData
            file = (fileName, fileContent)
            modifiedFiles.append(file)
        return modifiedFiles
    

    
class FileProcessing:
    
    def ReadAndWrite(spectra,source,target):
        # output format: Tuple(External Filename, File Contents)[]
        os.makedirs(target, exist_ok=True)
        NewFiles = SpectTools.ProcessSpectra(spectra, source)
        for index in range(len(NewFiles)):
            writeFile = open(target+NewFiles[index][0], "w")
            writeFile.write(NewFiles[index][1])

### Setting the Spectrum Range To Split The Files

In [42]:
spectra = []
#spectra.append(Spectrum("Full-Spectrum", "_ALL"))
spectra.append(Spectrum("Visual Spectrum", "_VIS",400.0, 750.0))
spectra.append(Spectrum("Near Infrared", "_IR1", 750.0, 1400.0))
spectra.append(Spectrum("Short-wavelength Infrared", "_IR2", 1400.0,3000.0))
#spectra.append(Spectrum("Fake-Wavelength", "_none", 200,201))

#### Below is example to use for single file

In [43]:
#source = "/Users/ankit/Desktop/ASTRO_0001_00001.sed"
#target = "/Users/ankit/Desktop/test/"
#FileProcessing.ReadAndWrite(spectra,source,target)

### Reading the folder containg all SED files

In [35]:
path = "../data"
sed_files = glob.glob(path + "/**/*.sed", recursive = True)

### Splitting all Files

In [44]:
for index in range(len(sed_files)):
    FileProcessing.ReadAndWrite(spectra,sed_files[index],sed_files[index].replace('/data','/splitFiles'))