In [1]:
import numpy 
import json
import os
import pandas as pd
from pprint import pprint

Here we:
1. Read a txt file
2. Parse the information in it into a dictionary
3. Save that dictionary into a JSON

In [None]:
path = "/Users/lucab/Google Drive/Crispies/Method1/SSP_all/Output/"
os.chdir(path) # can be changed to any path
txt_files = [f for f in os.listdir(path) if f.endswith('txt')]
print(f"Found {len(file)} files")

# parsing the structure of the text file, which is as follows:
# line 1:  Filename = XXXXXXXXX.tif
# line 2:  ScanSize = 1180 nm
# line 3:  Correlation length = XX.x nm
# line 4:  (empty)

data = {}

for file in txt_files:

    with open(file) as f:
        lines = f.readlines()
        lines.pop(0) # first line is empty
        n_measurements = int(len(lines)/6); # in this txt file

        for i in range(n_measurements):
            filename = lines[i*6+0][15:-6]   
            scansize = lines[i*6+1][12:-4]
            temperature = filename[2:5]  
            if temperature=='asd': temperature = '25'
            roughness = lines[i*6+2][13:-4]
            corr_len = lines[i*6+3][21:-4]
            alpha = lines[i*6+4][20:-1]
        
            data[filename] = {}
            data[filename]['temp'] = temperature
            data[filename]['L'] = scansize
            data[filename]['xi'] = corr_len
            data[filename]['rms'] = roughness
            data[filename]['a'] = alpha

In [None]:
# with open('json_example.json', 'w') as outfile:
#     json.dump(data, outfile, indent=3) # saves to a JSON FILE

In [None]:
# pandas dataframe for analysis
df = pd.DataFrame.from_dict(data, orient='index')
# df.head(5)
df.temp = df.temp.astype('float')
df.L = df.L.astype('float') # string to float conversion
df.xi= df.xi.astype('float') # string to float conversion
df.a = df.a.astype('float')
df.rms= df.rms.astype('float')
df.reset_index(inplace=True) # restructure the dataframe. Before the index was the filename, now it's a column
df.rename(columns={'index':'name'}, inplace=True) # the previous line move the index into a column, we are renaming it

In [None]:
## dataframes are nice because you can take a column by just using dot notation.
## for example
all_filenames = df.name # equivalent to 
# alternatively, you could do:
all_filenames = df['name'] #it's the same exact thing
# print(all_filenames)
# print(type(all_filenames))

## A column is a Pandas.Series type, and you can call other methods on it, like .mean() or .std()
print(f"\nAverage correlation length = {df.xi.mean():.3} ± {df.xi.std():.2}")

In [None]:
## we can ask which rows satisfy a certain condition, like 
# print(df.corrlen==24.2) 
AvCorrLen = {}
Err_AvCorrLen = {}

temperatures = df.temp.unique()

# I want to calculate the average correlation length for each temperature
# So I cycle through all temperatures and calculate corrlen for each

for t in temperatures:
    corr = df[df.temp==t].xi.mean()
    err_corr = df[df.temp==t].xi.std()
    AvCorrLen[t] = {}
    AvCorrLen[t]['AvCorr'] = corr
    Err_AvCorrLen[t] = {}
    Err_AvCorrLen[t]['AvCorrError'] = err_corr
    
# # Based on that result, we can slect rows in the original dataframe, making a sort of 'filter'
#     tAsD = df[df.temp==25]
#     t250 = df[df.temp==250]
#     t500 = df[df.temp==500]
#     t600 = df[df.temp==600] # and we assign this to a new DataFrame
#     t700 = df[df.temp==700]
#     t800 = df[df.temp==800]

# corr1 = tAsD.corrlen.mean()
# corr2 = t250.corrlen.mean()
# corr3 = t500.corrlen.mean()
# corr4 = t600.corrlen.mean()
# corr5 = t700.corrlen.mean()
# corr6 = t800.corrlen.mean()

with open('Ru_H_AvCorrLen.json', 'w') as outfile:
    json.dump(AvCorrLen, outfile, indent=3) # saves to a JSON FILE
    
with open('Ru_H_AvCorrLen.json', 'a') as outfile:
    json.dump(Err_AvCorrLen, outfile, indent=3) # saves to a JSON FILE

    