# This notebook does the following things:
# 1. Extract rW from PDFgui project file (.ddp or .ddp3)
# 2. Calculate the Pearson correlation coefficient

In [15]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import deepcopy
from diffpy.pdfgui.tui import LoadProject
from diffpy.pdfgui.control.pdfguicontrol import PDFGuiControl
from diffpy.pdffit2.pdffit2 import calculationError

from IPython.display import clear_output
# from tqdm.auto import tqdm
import time
import shutil
import yaml

In [154]:
def get_uid_by_fn(fn, uid_idx=-4, sep='_', ):
    fn_basename = os.path.basename(str(fn))
    split_list = fn_basename.split(sep)
    uid = split_list[uid_idx]
    return uid

In [156]:
def get_item_string(str_obj, sep='  '):
    ii = str_obj.split(sep)
    item_list = [x.lstrip(' ') for x in ii if x != '']
    return item_list

def get_phase_content(fitting_obj, num_phase=3, content_key='Relative phase content in terms of'):
    ## example of fitting_obj: xxxx.getfFts()[i] if read a ddp file
    ##                     or: xxxx.fits[i] if created from PDFGuiControl()

    result = fitting_obj.res
    res_split = result.split('\n')

    for s in res_split:
        if content_key in s:
            s_idx = res_split.index(s)

    phase_content = {}
    key_list = get_item_string(res_split[s_idx+1], sep='  ')

    content_list=[]
    for i in range(num_phase):
        phase_i = get_item_string(res_split[s_idx+2+i], sep=' ')
        if phase_i[3] == '0':
            phase_ii = [float(j) for j in phase_i[3:]]
        else:
            phase_ii = [float(j) for j in phase_i[3::2]]
        content_list.append(phase_ii)

    content_array = np.asarray(content_list, dtype=np.float32)
    for i in range(num_phase):
        d = {f'{key_list[i]}':[c for c in content_array[:,i]]}
        phase_content.update(d)

    return phase_content

## Get uids from an Excel spreadsheet

In [2]:
xlsx_dir = '/Users/cheng-hunglin/Library/CloudStorage/OneDrive-BrookhavenNationalLaboratory/LDRD_PQDs_log'
xlsx_name = 'blop_test_20250724.xlsx'
xlsx_fn = os.path.join(xlsx_dir, xlsx_name)

In [5]:
xlsx = pd.read_excel(xlsx_fn, sheet_name='20250606_XPD_beamtime', header=1, )
xlsx

Unnamed: 0,Cs-rich 33mM,TOABr 66 mM,Pb-rich 33mM,20%OLA 66mM,Unnamed: 4,PLQY,Peak (nm),FWHM (nm),CsBr.gr correlation,Cs4PbBr6.gr correlation,CsPbBr3.gr correlation,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,UID
0,20.00,40.00,180.00,0.00,,0.30,512.51,24.24,0.355,0.391,0.548,,,,,,c1a394d2-4436-404c-8cf7-7ea7d886b27e
1,180.00,40.00,20.00,0.00,,0.13,512.88,28.38,0.769,0.515,0.281,,,,,,9e13eba9-85a6-420b-bbf2-92d035c8c4ec
2,80.00,80.00,80.00,0.00,,0.26,513.80,25.20,0.412,0.475,0.645,,,,,,d03d556a-29e5-4fbb-96eb-71cc843f4a3a
3,20.00,80.00,140.00,0.00,,0.20,514.93,23.33,0.254,0.376,0.648,,,,,,a26ebc5e-387e-45ac-b555-d0889ae15126
4,140.00,80.00,20.00,0.00,,0.24,514.28,25.27,0.822,0.540,0.338,,,,,,2047e3cc-0b3f-4556-af59-d1f4558de3a2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,20.00,70.00,22.00,0.00,,0.14,516.26,21.64,0.249,0.601,0.637,,,,,,a9aa8a79
90,35.45,162.02,83.42,37.64,,,,,,,,,,,,,
91,18.00,80.00,40.00,18.00,,,,,,,,,,,,,
92,30.00,90.00,50.00,5.00,,,,,,,,,,,,,


In [8]:
xlsx['UID'].shape

(94,)

In [22]:
xlsx_uid_array = xlsx['UID'].dropna().to_numpy()
xlsx_uid_array.shape

(90,)

## Load ddp project files to get rW and phase scale

In [16]:
ddp_dir = '/Users/cheng-hunglin/Documents/Data_LDRD'
day1_name = 'Cs_x-Pb_y-B_z_serial_bkg_10min_22-1.ddp.ddp3'
day3_name = 'Cs_x-Pb_y-B_z_serial_bkg_day3_22-1.ddp.ddp3'
day4_name = 'Cs_x-Pb_y-B_z_serial_bkg_day4_22-1.ddp.ddp3'

fn_10min = os.path.join(ddp_dir, day1_name)
fn_day3 = os.path.join(ddp_dir, day3_name)
fn_day4 = os.path.join(ddp_dir, day4_name)

In [17]:
ddp_10min = LoadProject(fn_10min)
ddp_day3 = LoadProject(fn_day3)
ddp_day4 = LoadProject(fn_day4)

In [18]:
rw = ddp_10min.getFits()[1].getData('rw')

In [19]:
rw

0.4536889791508549

In [23]:
fn = ddp_10min.getFits()[1].datasets.keys()[0]

In [28]:
get_uid_by_fn(fn)

'91db25'

In [63]:
len(ddp_10min.getFits())

38

In [157]:
rw_list = []
CsBr_list = []
Cs4PbBr6_list = []
CsPbBr3_list = []