In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import statsmodels.api as sm
import os
from sklearn.linear_model import LinearRegression
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statannotations.Annotator import Annotator

# statsmodels and sklearn are currently not used. Both packages can be omitted for now, but sklearn will
# very likely be used in the future

%matplotlib inline

# Folder containing the data file
data_dir = r'C:\Users\acm4005\Box\WCM_Tulane_Shared_Folder\Preeclampsia Model' # For UITC 2023 talk

# Name of the file containing the QUS results
#data_fid = r'2023-03-07_GD14-18.csv'
data_fid = r'2023-07-28_segments.csv'

# Define an array of the QUS parameter names. This will be useful later
qus_params = ['ESD','EAC','MBF','SS','I0','Naka Omega','Naka m','HK k','HK alpha','Burr b','Burr lambda']

# Move to the data folder and read the .csv into a DataFrame
os.chdir(data_dir)
in_data = pd.read_csv(data_fid)

# Rename the columns using abbreviations for the parameter names
new_names = {'HK Structure Param':'HK k','HK Scatterer Clustering Param':'HK alpha','Nak Shape Param':'Naka m','Nak Scale Factor':'Naka Omega', 
             'Spectral Slope':'SS','Intercept':'I0','Midband Fit':'MBF','Effective Scatterer Size':'ESD','Acoustic Concentration':'EAC',
             'Burr_b':'Burr b','Burr_lambda':'Burr lambda'}
in_data.rename(new_names,axis=1,inplace=True)

# Convert ESD from m to um
in_data['ESD'] *= 1e6

# Log compress the Nakagami Omega and HK alpha parameters
in_data['Naka Omega'] = np.log10(in_data['Naka Omega'])
in_data['HK alpha'] = np.log10(in_data['HK alpha'])

# As a precaution, remove any rows that contain a NaN in any column
in_data.dropna(axis=0,inplace=True)

in_data_copy = in_data.copy()

# Based on the ID string, split into new columns with ID, Day, and Frame #
in_data[['ID','Day','Frame']] = in_data['ID'].str.split('_',2,expand=True)

# Convert Day column into integer data type (instead of string)
in_data['Day'] = in_data['Day'].astype(int)

# # Keep only the first 5 characters from the ID - the last specifies placenta (not animal) and is not needed at this time
# # This will make it easier to separate the control and treat groups
# in_data['ID'] = in_data['ID'].str[:5]

# Remove any rows where the ESD is zero 
# This means QUS estimation failed
remove_idx = (in_data['ESD'] <= 0)
in_data.drop(in_data.loc[remove_idx].index,inplace=True)

sum_data = in_data.groupby(['ID','Day'],as_index=False).count()

#Specify placentas which do not meet the criteria for inclusion in this study
#drop_placentas = {'RTG341_18_1','RTG344_18_1','RTG345_18_1','RTG361_18_1','RTG362_18_1','RTG363_18_1','RTG364_18_1','RTG371_18_1','RTG372_18_1','RTG375_18_1','RTG435_18_1','RTG441_18_1','RTG442_18_1','RTG445_18_1','RTG461_18_1','RTG462_18_1','RTG463_18_1','RTG465_18_1'}

#for drop_p in drop_placentas:
#    in_data_copy.drop(in_data_copy.loc[in_data_copy['ID'] == drop_p].index,inplace=True)


# Rename the Group IDs to 'Control' and 'RUPP'
in_data.loc[in_data['ID'].str[:5] == 'RTG11','Group'] = 'Control'
in_data.loc[in_data['ID'].str[:5] == 'RTG36','Group'] = 'Control'
in_data.loc[in_data['ID'].str[:5] == 'RTG37','Group'] = 'Control'
in_data.loc[in_data['ID'].str[:5] == 'RTG43','Group'] = 'Control'
in_data.loc[in_data['ID'].str[:5] == 'RTG34','Group'] = 'RUPP'
in_data.loc[in_data['ID'].str[:5] == 'RTG44','Group'] = 'RUPP'
in_data.loc[in_data['ID'].str[:5] == 'RTG46','Group'] = 'RUPP'

#Generate string to save file
in_data_18 = in_data[in_data['Day']==18]
in_data_18.drop(columns = ['Day','Frame'],inplace=True)
#in_data_18 = in_data_18.groupby(['ID'],as_index=False)
save_csv = data_dir + r'\day_18_data_' + data_fid[:-4] + r'.csv'

#with pd.ExcelWriter(save_xls) as writer:
#     in_data_18.to_excel(writer)

in_data_18.to_csv(save_csv)

  in_data[['ID','Day','Frame']] = in_data['ID'].str.split('_',2,expand=True)
