In [1]:
import codecs
import shutil
import numpy as np
import pandas as pd 

### CTan Morphometry Results
##### Author: Anna Valentine (annavalentine@mines.edu)
#### Date: 07/09/21

#### Purpose: 
This takes in a folder of CTan .txt files, converts them to UTF-8 (because python doesn't like ANSCI?) and compiles the mophometry results. After getting all the morphometry results in one place, SSA is calculated. The morphometry results can be found in the .csv file called "M_RESULTS_PIT1s17.csv". 

In [2]:
# Write down my standard path here
path = '/Users/annav/1_WORK_CRREL/CTan Project 1/' 
files = path+'Data/*.txt'  
UTF8_folder = path + 'D_UTF8/'   # set up a folder for UTF8 conversion

In [3]:
def list_files_local(path):
    """ Get file list form local folder. """
    from glob import glob
    return glob(path)

list_files = list_files_local(files)  


In [4]:
BLOCKSIZE = 300000 # desired size in bytes, this is 300 kB, which is larger than biggest file 

for file in list_files: 
    name_conv = UTF8_folder + file[46:62] +'_UTF8.txt'   #naming convention and moves to folder
    with codecs.open(file, "r", "mbcs") as sourceFile:
        with codecs.open(name_conv, "w", "utf-8") as targetFile:     # convert to UTF-8
            while True:
                contents = sourceFile.read(BLOCKSIZE)
                if not contents:
                    break
                targetFile.write(contents)
                
#Let's grab the snowpit name while we are at it, but I dont know if all snowpits are 4 characters?
snowpit = list_files[0][46:50]

In [5]:
#Now let's look at our converted UTF8 files
UTF8_files = TK21.list_files_local(UTF8_folder+ '/*.txt')  


NameError: name 'TK21' is not defined

In [None]:
#We need to figrure out where to start our skiprows!
term = "MORPHOMETRY"

def find_start(term, file):
    row = 0
    file_o = open(file)
    for line in file_o:
        row += 1
        line.strip().split('/n')
        if term in line:
            return (row +1)
    file.close()

In [None]:
#Make a preliminary dataframe to append to (with description, a column with Units)
s_row1 = find_start(term, f1)
df1 = pd.read_csv(f1, skiprows= s_row1, nrows= 52)  #Read in the part of the file we want, "morphometry results"
### Description, Abbreviation, Value, Unit
df1.loc[-3] = ['Sample Name', np.nan, np.nan, np.nan]  # Row for Sample Name
df1.loc[-2] = ['Scan Depth', np.nan, np.nan, 'cm']  # Row for Scan Depth
df1.loc[-1] = ['Average Depth', np.nan, np.nan, 'cm']  # Row for Avg. Depth
df1.index = df1.index + 3  # shifting index
df1.sort_index(inplace=True) 

#Okay, our main dataframe just has the description column woohoo
df_main = pd.DataFrame(df1["Description"])
df_main["Unit"] = pd.DataFrame(df1["Unit"])


In [None]:
for file in UTF8_files:
    
    #Find start row, read in csv for Morpho Results
    row = find_start(term, file)
    df_int = pd.read_csv(file, skiprows= row, nrows= 52)
    
    #Find scan depth,
    sc = file.split("_") #This read from file name
    scan_depth = sc[4]

    #find average depth
    cutoff = scan_depth[0:-2]
    x = cutoff.split("-")
    hi = float(x[0])
    lo = float(x[1])
    avg_depth = (hi + lo)/2
   
    
    ### Add in some rows for this prelim info
    ### Description, Abbreviation, Value, Unit
    df_int.loc[-3] = ['Sample Name', np.nan, np.nan, np.nan]  # Row for Sample Name (which I don't know?)
    df_int.loc[-2] = ['Scan Depth', np.nan, scan_depth, 'cm']  # Row for Scan Depth
    df_int.loc[-1] = ['Average Depth', np.nan, avg_depth, 'cm']  # Row for Avg. Depth
    df_int.index = df_int.index + 3  # shifting index
    df_int.sort_index(inplace=True) 
    
    
    #Add the column we want ("Values") to the datafram
    df_main[avg_depth] = df_int["Value"]
    
    
    

In [None]:
# Take away the dang headers
SSA = ["SSA", "m^2/kg"]
SSA_int = df_main.loc[ 15, : ]  #Location of Object Surface/ Volume Ratio

# mm^2/mm^3 --> m^2/m^3
mm2m = 1000

# m^2/m^3 --> m^2/kg
m2kg = .001090513

#Initialize
SSA_i = 0

#Loop through and calculate
for i in range(len(SSA_int)-2):
    SSA_i = float(SSA_int[i+2])*mm2m*m2kg
    SSA.append(SSA_i) 


df_main.loc[55] = SSA    #Add to our main dataframe!
   

In [None]:
# Export our dataframe to a .csv

df_main.to_csv("M_RESULTS_PIT"+snowpit+".csv", index =False)

In [None]:
#Plotting
import matplotlib.pyplot as plt 

#Importing from our dataframe, I think the df's indexing is a little weird? 
depth = df_main.loc[2, :]
SSA = df_main.loc[55]

depth = depth[2:]
SSA = SSA[2:]

plt.scatter(SSA, depth)
plt.ylabel("Depth (cm)")
plt.xlabel("SSA (m^2/kg)")
plt.xlim(0, 25)