# This notebook does the following things:
# 1. Extract rW from PDFgui project file (.ddp or .ddp3)
# 2. Calculate the Pearson correlation coefficient

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy
from diffpy.pdfgui.tui import LoadProject
from diffpy.pdfgui.control.pdfguicontrol import PDFGuiControl
from diffpy.pdffit2.pdffit2 import calculationError

from IPython.display import clear_output
# from tqdm.auto import tqdm
import time
import shutil
import yaml

In [2]:
def get_uid_by_fn(fn, uid_idx=-4, sep='_', ):
    fn_basename = os.path.basename(str(fn))
    split_list = fn_basename.split(sep)
    uid = split_list[uid_idx]
    return uid

In [3]:
def get_item_string(str_obj, sep='  '):
    ii = str_obj.split(sep)
    item_list = [x.lstrip(' ') for x in ii if x != '']
    return item_list

def get_phase_content(fitting_obj, num_phase=3, content_key='Relative phase content in terms of'):
    ## example of fitting_obj: xxxx.getfFts()[i] if read a ddp file
    ##                     or: xxxx.fits[i] if created from PDFGuiControl()

    result = fitting_obj.res
    res_split = result.split('\n')

    for s in res_split:
        if content_key in s:
            s_idx = res_split.index(s)

    phase_content = {}
    key_list = get_item_string(res_split[s_idx+1], sep='  ')

    content_list=[]
    for i in range(num_phase):
        phase_i = get_item_string(res_split[s_idx+2+i], sep=' ')
        if phase_i[3] == '0':
            phase_ii = [float(j) for j in phase_i[3:]]
        else:
            phase_ii = [float(j) for j in phase_i[3::2]]
        content_list.append(phase_ii)

    content_array = np.asarray(content_list, dtype=np.float32)
    for i in range(num_phase):
        d = {f'{key_list[i]}':[c for c in content_array[:,i]]}
        phase_content.update(d)

    return phase_content


def get_rw_in_result(fitting_obj, content_key='Rw - value'):
    result = fitting_obj.res
    res_split = result.split('\n')

    for s in res_split:
        if content_key in s:
            s_idx = res_split.index(s)
    
    rw = res_split[s_idx].split(' ')[-1]
    return float(rw)
    

In [4]:
def index_uid_list(uid, uid_array, print_info=True):
    uid_array = np.asarray(uid_array)

    idx_list = []
    for idx in range(uid_array.shape[0]):
        if uid in uid_array[idx]:
            idx_list.append(idx)
            
    if print_info:
        
        if len(idx_list) == 1:
            print(f'Only found one corresponding uid {idx_list = }. It is good.')
    
        elif len(idx_list) > 1:
            print('Found more than one coorespnding uids. Need further check.')
    
        elif len(idx_list) == 0:
            print('Cannot find the corresponding uid. Need further check.')
    
    return idx_list


## Get uids from an Excel spreadsheet

In [5]:
xlsx_dir = '/Users/cheng-hunglin/Library/CloudStorage/OneDrive-BrookhavenNationalLaboratory/LDRD_PQDs_log'
xlsx_name = 'blop_test_20250724.xlsx'
xlsx_fn = os.path.join(xlsx_dir, xlsx_name)

In [6]:
xlsx = pd.read_excel(xlsx_fn, sheet_name='20250606_XPD_beamtime', header=1, )
xlsx

Unnamed: 0,Cs-rich 33mM,TOABr 66 mM,Pb-rich 33mM,20%OLA 66mM,Unnamed: 4,PLQY,Peak (nm),FWHM (nm),CsBr.gr correlation,Cs4PbBr6.gr correlation,CsPbBr3.gr correlation,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,UID
0,20.00,40.00,180.00,0.00,,0.30,512.51,24.24,0.355,0.391,0.548,,,,,,c1a394d2-4436-404c-8cf7-7ea7d886b27e
1,180.00,40.00,20.00,0.00,,0.13,512.88,28.38,0.769,0.515,0.281,,,,,,9e13eba9-85a6-420b-bbf2-92d035c8c4ec
2,80.00,80.00,80.00,0.00,,0.26,513.80,25.20,0.412,0.475,0.645,,,,,,d03d556a-29e5-4fbb-96eb-71cc843f4a3a
3,20.00,80.00,140.00,0.00,,0.20,514.93,23.33,0.254,0.376,0.648,,,,,,a26ebc5e-387e-45ac-b555-d0889ae15126
4,140.00,80.00,20.00,0.00,,0.24,514.28,25.27,0.822,0.540,0.338,,,,,,2047e3cc-0b3f-4556-af59-d1f4558de3a2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,20.00,70.00,22.00,0.00,,0.14,516.26,21.64,0.249,0.601,0.637,,,,,,a9aa8a79
90,35.45,162.02,83.42,37.64,,,,,,,,,,,,,
91,18.00,80.00,40.00,18.00,,,,,,,,,,,,,
92,30.00,90.00,50.00,5.00,,,,,,,,,,,,,


In [7]:
xlsx['UID'].shape

(94,)

In [8]:
xlsx_uid_array = xlsx['UID'].dropna().to_numpy()
xlsx_uid_array.shape

(90,)

## Load ddp project files to get rW and phase scale

In [9]:
ddp_dir = '/Users/cheng-hunglin/Documents/Data_LDRD'
day1_name = 'Cs_x-Pb_y-B_z_serial_bkg_10min_22-1.ddp.ddp3'
day3_name = 'Cs_x-Pb_y-B_z_serial_bkg_day3_22-1.ddp.ddp3'
day4_name = 'Cs_x-Pb_y-B_z_serial_bkg_day4_22-1.ddp.ddp3'

fn_10min = os.path.join(ddp_dir, day1_name)
fn_day3 = os.path.join(ddp_dir, day3_name)
fn_day4 = os.path.join(ddp_dir, day4_name)

In [10]:
ddp_10min = LoadProject(fn_10min)
ddp_day3 = LoadProject(fn_day3)
ddp_day4 = LoadProject(fn_day4)

In [11]:
rw = ddp_10min.getFits()[2].getData('rw')

In [12]:
rw

0.45467085458089257

In [13]:
fn = ddp_10min.getFits()[1].datasets.keys()[0]

In [14]:
type(ddp_10min.getFits())

list

In [15]:
get_uid_by_fn(fn)

'91db25'

In [16]:
len(ddp_10min.getFits())

38

In [17]:
from numpy.dtypes import StringDType

rw_array = np.zeros(xlsx_uid_array.shape)
CsBr_array = np.zeros(xlsx_uid_array.shape)
Cs4PbBr6_array = np.zeros(xlsx_uid_array.shape)
CsPbBr3_array = np.zeros(xlsx_uid_array.shape)
ddp_uid_array = np.empty(xlsx_uid_array.shape, dtype=StringDType())
fn_array = np.empty(xlsx_uid_array.shape, dtype=StringDType())

In [18]:
rw_array.shape

(90,)

In [19]:
check_uid = []
total_pdffit2 = ddp_10min.getFits() + ddp_day3.getFits() + ddp_day4.getFits()

for i in range(len(total_pdffit2)):
# for i in range(5):
    fn = total_pdffit2[i].datasets.keys()[0]
    
    rw = get_rw_in_result(total_pdffit2[i])
    phase_content = get_phase_content(total_pdffit2[i])

    print(fn, rw)
    
    uid_fit = get_uid_by_fn(fn)
    idx_list = index_uid_list(uid_fit, xlsx_uid_array)

    print('\n')
    
    if len(idx_list) == 0:
        check_uid.append(uid_fit)
    
    else:
        fn_array[idx_list[0]] = fn
        rw_array[idx_list[0]] = rw
        ddp_uid_array[idx_list[0]] = uid_fit
        CsBr_array[idx_list[0]] = phase_content['mass'][0]
        Cs4PbBr6_array[idx_list[0]] = phase_content['mass'][1]
        CsPbBr3_array[idx_list[0]] = phase_content['mass'][2]

Cs_060_Br_120_Pb_060_PF_020_Tol_1200_20250606-233737_992618_scattering-1_mean_q.gr 0.262066
Only found one corresponding uid idx_list = [5]. It is good.


Cs_000_Br_100_Pb_100_OLA_020_PF_018_Tol_1100_20250607-195546_91db25_scattering-1_mean_q.gr 0.453689
Only found one corresponding uid idx_list = [20]. It is good.


Cs_000_Br_120_Pb_060_OLA_005_PF_015_Tol_925_20250607-210829_ee525f_scattering-1_mean_q.gr 0.454671
Only found one corresponding uid idx_list = [22]. It is good.


Cs_000_Br_120_Pb_120_PF_020_Tol_1200_20250607-021742_d0b4bc_scattering-1_mean_q.gr 0.451138
Only found one corresponding uid idx_list = [9]. It is good.


Cs_000_Br_180_Pb_060_PF_020_Tol_1200_20250607-023725_6129b4_scattering-1_mean_q.gr 0.609117
Only found one corresponding uid idx_list = [10]. It is good.


Cs_005_Br_040_Pb_010_OLA_000_PF_005_Tol_275_20250607-220713_6c0aea_scattering-1_mean_q.gr 0.305446
Only found one corresponding uid idx_list = [23]. It is good.


Cs_010_Br_080_Pb_110_OLA_010_PF_018_Tol_1050

In [20]:
rw_array

array([0.498661, 0.421203, 0.326182, 0.376581, 0.233113, 0.262076,
       0.267686, 0.171292, 0.126442, 0.451138, 0.609117, 0.11297 ,
       0.116686, 0.427648, 0.290175, 0.49738 , 0.443992, 0.362031,
       0.39817 , 0.358669, 0.453689, 0.236434, 0.454671, 0.305446,
       0.205521, 0.228118, 0.357139, 0.325553, 0.323024, 0.297829,
       0.321884, 0.348258, 0.215812, 0.251361, 0.257464, 0.359031,
       0.776852, 0.256612, 0.29968 , 0.581877, 0.199612, 0.75394 ,
       0.540659, 0.566139, 0.364869, 0.356048, 0.378455, 0.430649,
       0.336232, 0.358438, 0.311056, 0.393756, 0.420404, 0.526983,
       0.421198, 0.493059, 0.481528, 0.286205, 0.270221, 0.27442 ,
       0.303323, 0.4479  , 0.404083, 0.365978, 0.338735, 0.288902,
       0.222444, 0.205972, 0.199022, 0.203475, 0.222852, 0.200619,
       0.232199, 0.193461, 0.243753, 0.223023, 0.171194, 0.157225,
       0.191966, 0.206643, 0.191396, 0.186939, 0.157742, 0.456707,
       0.421519, 0.292649, 0.257327, 0.408539, 0.356649, 0.283

In [21]:
ddp_uid_array

array(['c1a394', '9e13eb', 'd03d55', 'a26ebc', '2047e3', '992618',
       '4d8520', 'e4b5eb', 'db00fc', 'd0b4bc', '6129b4', '854c30',
       '860dbf', '99c5e0', '7ac162', '8b795a', '50fbde', '2ea4da',
       '26d97d', 'ed358a', '91db25', '61ff18', 'ee525f', '6c0aea',
       'ca1d54', '5cd92f', '620d4e', 'e65c02', 'd0803c', '0bfd55',
       '629b0d', 'e27e88', 'c172aa', '08e90c', '4cd305', 'a5e8e9',
       '4e1986', '717c6e', 'ded9a3', '3486c9', 'dc413d', '9d3865',
       'e5c3d6', '938911', 'b5be4a', '9f5fa3', '3acff7', '4a9843',
       '804aeb', '70c3f8', '2a946a', '6b21cd', '4208be', '3d7ab3',
       '15626c', 'f353ff', '0b6ae6', '8364d6', 'd95074', '157555',
       'c202cd', '757e04', 'd50931', '5d181d', '95b2c2', '12cfba',
       '44c93f', '291d1d', '2e9a36', '746b39', 'fc4b69', 'fcded6',
       '1ec05d', 'bf8e84', 'ea83af', '315214', '86af6b', '1b0770',
       'ff2a42', '922f95', '1e9846', 'c2b71e', '8d176d', 'c8114c',
       '0718dd', 'acc484', '58afe3', 'bf9914', '1785b6', 'a9aa

In [22]:
check_uid

['3ae92b', 'cd7e20']

In [24]:
xlsx_fit = pd.DataFrame()
xlsx_fit['rw'] = rw_array
xlsx_fit['CsBr wt%'] = CsBr_array
xlsx_fit['Cs4PbBr6 wt%'] = Cs4PbBr6_array
xlsx_fit['CsPbBr3 wt%'] = CsPbBr3_array
xlsx_fit['uid_in_ddp'] = ddp_uid_array
xlsx_fit['chi_name'] = fn_array

In [25]:
xlsx_dir = '/Users/cheng-hunglin/Library/CloudStorage/OneDrive-BrookhavenNationalLaboratory/LDRD_PQDs_log'
xlsx_name = 'blop_test_20250725.xlsx'
xlsx_fit.to_excel(xlsx_fn, sheet_name='20250725_pdffit2')