In [32]:
import numpy 
import json
import os
import pandas as pd
from pprint import pprint

In [33]:
path = "/Users/sfiligoj/Desktop/clean/Low-High_Density_Ru/"

os.chdir(path) # can be changed to any path

files = [f for f in os.listdir(path) if f.endswith('txt')]
print(f"Found {len(files)} files")
# parsing the structure of the file, which is as follows:
# line 1:  Filename = XXXXXXXXX.tif
# line 2:  ScanSize = 1180 nm
# line 3:  Correlation length = XX.x nm
# line 4:  (empty)

data = {}

for file in files:
    with open(file) as f:
        lines = f.readlines()
        lines.pop(0) # first line is empty
        n_measurements = int(len(lines)/4);

        for i in range(n_measurements):
            filename = lines[i*4+0][12:-6]
            temperature = filename[3:6] 
            if temperature=='asd': temperature = '25'
            scansize = lines[i*4+1][12:-5]
            corr_len = lines[i*4+2][22:-5]
            data[filename] = {}
            data[filename]['temp'] = temperature
            data[filename]['scansize'] = scansize
            data[filename]['corrlen'] = corr_len

Found 6 files


In [26]:
# with open('json_example.json', 'w') as outfile:
#     json.dump(data, outfile, indent=3) # saves to a JSON FILE

In [34]:
# pandas dataframe for analysis
df = pd.DataFrame.from_dict(data, orient='index')
# df.head(5)
df.temp = df.temp.astype('float')
df.corrlen = df.corrlen.astype('float') # string to float conversion
df.scansize= df.scansize.astype('float') # string to float conversion
df.reset_index(inplace=True) # restructure the dataframe. Before the index was the filename, now it's a column
df.rename(columns={'index':'name'}, inplace=True) # the previous line move the index into a column, we are renaming it

In [35]:
## dataframes are nice because you can take a column by just using dot notation.
## for example
all_filenames = df.name # equivalent to 
# alternatively, you could do:
all_filenames = df['name'] #it's the same exact thing
# print(all_filenames)
# print(type(all_filenames))

## A column is a Pandas.Series type, and you can call other methods on it, like .mean() or .std()
print(f"\nAverage correlation length = {df.corrlen.mean():.3} ± {df.corrlen.std():.2}")


Average correlation length = 22.5 ± 2e+01


In [36]:
## we can ask which rows satisfy a certain condition, like 
# print(df.corrlen==24.2) 
AvCorrLen = {}

temperatures = df.temp.unique()

# I want to calculate the average correlation length for each temperature
# So I cycle through all temperatures and calculate corrlen for each

for t in temperatures:
    corr = df[df.temp==t].corrlen.mean()
    AvCorrLen[t] = {}
    AvCorrLen[t]['AvCorr'] = corr
    
# # Based on that result, we can slect rows in the original dataframe, making a sort of 'filter'
#     tAsD = df[df.temp==25]
#     t250 = df[df.temp==250]
#     t500 = df[df.temp==500]
#     t600 = df[df.temp==600] # and we assign this to a new DataFrame
#     t700 = df[df.temp==700]
#     t800 = df[df.temp==800]

# corr1 = tAsD.corrlen.mean()
# corr2 = t250.corrlen.mean()
# corr3 = t500.corrlen.mean()
# corr4 = t600.corrlen.mean()
# corr5 = t700.corrlen.mean()
# corr6 = t800.corrlen.mean()

with open('json_example.json', 'w') as outfile:
    json.dump(AvCorrLen, outfile, indent=3) # saves to a JSON FILE