In [20]:
import codecs
import shutil
import numpy as np
import pandas as pd 

### CTan Morphometry Results 2D
##### Author: Anna Valentine (annavalentine@mines.edu)
#### Date: 07/19/21

#### Purpose:  
Takes in Ctan/batman files from microCT and concatonates the 2D results from the snowpit into one dataframe. This is meant for one snowpit at a time. 

In [21]:
# Write down my standard path here
path = '/Users/annav/1_WORK_CRREL/CTan Project 1/' 
files = path+'Data/*.txt'  
UTF8_folder = path + 'D_UTF8/'   # set up a folder for UTF8 conversion

In [22]:
#general takes a path and spits out all of the file names 
def list_files_local(path):
    """ Get file list form local folder. """
    from glob import glob
    return glob(path)


In [23]:
# Converts files from ANSCI to UTF8, which python can read
def UTF8_convert(list_files):
    BLOCKSIZE = 300000 # desired size in bytes, this is 300 kB, which is larger than biggest file 

    for file in list_files: 
        name_conv = UTF8_folder + file[46:62] +'_UTF8.txt'   #naming convention and moves to folder for UTF8-Files
        with codecs.open(file, "r", "mbcs") as sourceFile:
            with codecs.open(name_conv, "w", "utf-8") as targetFile:     # convert to UTF-8
                while True:
                    contents = sourceFile.read(BLOCKSIZE)
                    if not contents:
                        break
                    targetFile.write(contents)
    

In [24]:
# gives the sample depth (lower) from the file name
def sample_height(file):
    #Find scan depth,
    sc = file.split("_") #This read from file name
    scan_depth_tot = sc[4]
    sc2 = scan_depth_tot.split("-")
    num = float(sc2[0])
    
    return num

In [25]:
#Find the term in the file (used for knowing where to start dataframe)
def find_term(term, file):
    row = 0
    file_o = open(file)
    for line in file_o:
        row += 1
        line.strip().split('/n')
        if term in line:
            return (row)
    file.close()

In [26]:
#Loop through all of the files in the snowpit and concatonates in one dataframe
def loop_files(files):
    
    #start and end terms
    start = "2D analysis"
    end = "3D analysis"
    frames = []

    for file in files:

        #Find start row, read in csv for Morpho Result
        end_row = (find_term(end, file)-4)
        start_row = find_term(start, file)+9
        nrow = end_row - start_row
        df_int = pd.read_csv(file, skiprows= (start_row), nrows=(nrow))

        #Find scan depth,
        sc = file.split("_") #This read from file name
        scan_depth = sc[4]

        #find average depth
        cutoff = scan_depth[0:-2]
        x = cutoff.split("-")
        hi = float(x[0])
        lo = float(x[1])
        avg_depth = (hi + lo)/2


        ### Add in some rows for this prelim info
        ### Description, Abbreviation, Value, Unit
        df_int['Scan Depth'] = [scan_depth]*nrow  # Row for Scan Depth

        df_int = df_int.drop([df_int.index[0]])
        df_int = df_int.rename(columns={'Unnamed: 0':'File Name'})

        #Add the column we want ("Values") to the datafram
        frames.append(df_int)
       

    result = pd.concat(frames)
        
    return result 
    
    

In [27]:
#Main calls all other functions, takes in a file path and yes/no if you want a .csv out 
def main(files, to_csv):
    list_files = list_files_local(files)  
    
    #get the snowpit name of file
    snowpit = str(list_files[0][46:50])
    
    
    #Convert to UTF-8
    UTF8_convert(list_files)
    
    #Sort-Files
    UTF8_files = list_files_local(UTF8_folder+ '/*.txt') 
    UTF8_files = sorted(UTF8_files, key = sample_height) 
    
    
    #Find Start/End of the dataframe
    start = "2D analysis"
    end = "3D analysis"
    
    #Loop through the files
    result = loop_files(UTF8_files)
    
    #If to .csv is wanted: 
    if to_csv:
        # Export our dataframe to a .csv
        result.to_csv("M_RESULTS_2D"+snowpit+".csv", index =False)
        
    return result 

In [28]:
main(files, True)

Unnamed: 0,File Name,Pos.Z,Obj.N,T.Ar,Obj.Ar,Obj.Ar/T.Ar,T.Pm,Obj.Pm,Obj.Pm/Obj.Ar,Av.Obj.Ar,...,MMI(min),T.Or(phi),Ecc,St.Th(pl),St.Sp(pl),St.Li.Dn(pl),FD,i.Pm,Unnamed: 39,Scan Depth
1,1s17_5-3cm_20um_rec_voi_1040.bmp,21.41504,149.0,93.99345,30.53807,32.48957,36.26756,339.58716,11.12013,0.20495,...,,166.54467,0.36757,0.17985,0.37372,1.80644,1.54754,12.48324,,5-3cm
2,1s17_5-3cm_20um_rec_voi_1039.bmp,21.39445,151.0,93.99345,30.49222,32.44079,36.26756,339.39845,11.13066,0.20194,...,,166.41858,0.39134,0.17968,0.37420,1.80544,1.69069,12.83122,,5-3cm
3,1s17_5-3cm_20um_rec_voi_1038.bmp,21.37386,153.0,93.99345,30.61306,32.56936,36.26756,343.01012,11.20470,0.20009,...,,165.30597,0.40363,0.17850,0.36955,1.82465,1.55996,13.28716,,5-3cm
4,1s17_5-3cm_20um_rec_voi_1037.bmp,21.35327,152.0,93.99345,30.81860,32.78803,36.26756,341.37986,11.07707,0.20275,...,,164.40286,0.41216,0.18055,0.37011,1.81598,1.67885,13.16008,,5-3cm
5,1s17_5-3cm_20um_rec_voi_1036.bmp,21.33267,138.0,93.99345,31.11482,33.10318,36.26756,340.59603,10.94642,0.22547,...,,163.18175,0.41304,0.18271,0.36923,1.81181,1.68291,12.80607,,5-3cm
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
897,1s17_94-92cm_20um_rec_voi_0044.bmp,0.90602,262.0,136.94602,45.29154,33.07255,46.80068,562.56584,12.42099,0.17287,...,,137.80450,0.19643,0.16102,0.32584,2.05397,1.76204,0.08237,,94-92cm
898,1s17_94-92cm_20um_rec_voi_0043.bmp,0.88543,258.0,136.94602,45.42197,33.16779,46.80068,561.18839,12.35500,0.17605,...,,136.29174,0.22195,0.16188,0.32618,2.04894,1.82027,0.12355,,94-92cm
899,1s17_94-92cm_20um_rec_voi_0042.bmp,0.86484,253.0,136.94602,45.48435,33.21334,46.80068,563.00421,12.37798,0.17978,...,,137.22689,0.23615,0.16158,0.32491,2.05557,1.82110,0.18532,,94-92cm
900,1s17_94-92cm_20um_rec_voi_0041.bmp,0.84425,262.0,136.94602,45.32307,33.09557,46.80068,564.17989,12.44796,0.17299,...,,136.84910,0.22729,0.16067,0.32480,2.05986,1.82226,0.22651,,94-92cm
