# Trying to write some code to pull parameters out form PCA files from the GE CT scanner
## Jaimi Gray 

I have multiple PCA files in the working directory, I am going to attempt to write some code that will pull out certain parameters and list them in a data frame. 

In [1]:
# get libraries that I need

import pandas as pd # pandas from data frames
import os # can deal with multiple files paths

In [25]:
# Specify the directory where your PCA files are located
directory = "/blue/practicum-ai/jaimigray/python_for_ai/PCA parser/PCA/"

# List all files in the directory
file_list = os.listdir(directory)
print(file_list)

['UF-herp-79994-head.pca', 'MZUC-32032.pca', 'VIMS-40878-diceCT.pca', 'UF-herp-84427-diceCT.pca', 'UF-herp-142512-head.pca', 'CAS-AMP-93870.pca', 'ANSP-126326-diceCT.pca', 'UF-bird-52652-diceCT.pca', 'UF-herp-79994-body.pca', 'YPM-ICH-024741-diceCT.pca', '.ipynb_checkpoints', 'YPM-MAM-005706-diceCT.pca']


In [27]:
# Define the parameters you want to extract from the PCA files
parameters_of_interest = ['VoxelSizeX', 'TimingVal', 'Voltage', 'Current', 'Filter','Avg', 'Skip']

# Create an empty DataFrame to store the extracted parameters
results_df = pd.DataFrame(columns=['File'] + parameters_of_interest)

print(results_df)

Empty DataFrame
Columns: [File, VoxelSizeX, TimingVal, Voltage, Current, Filter, Avg, Skip]
Index: []


In [40]:
import re

# Define the text pattern you want to search for using regular expressions
search_pattern = r'VoxelSizeX=(\d+\.\d+)'

# Iterate over each file in the directory
for file_name in os.listdir(directory):
    # Check if the file is a PCA file
    if file_name.endswith('.pca'):
        file_path = os.path.join(directory, file_name)
        
        # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read the contents of the file
            file_contents = file.read()
            
            # Search for the specified text pattern within the file contents
            matches = re.findall(search_pattern, file_contents)
            
            # If matches are found, extract the floating-point values after the text pattern
            if matches:
                values = [float(match) for match in matches]
                print(f"Values found in file {file_name}: {', '.join(map(str, values))}")

Values found in file UF-herp-79994-head.pca: 0.01955444
Values found in file MZUC-32032.pca: 0.02183344
Values found in file VIMS-40878-diceCT.pca: 0.02071334
Values found in file UF-herp-84427-diceCT.pca: 0.04222976
Values found in file UF-herp-142512-head.pca: 0.02116693
Values found in file CAS-AMP-93870.pca: 0.10627983
Values found in file ANSP-126326-diceCT.pca: 0.03537129
Values found in file UF-bird-52652-diceCT.pca: 0.0191472
Values found in file UF-herp-79994-body.pca: 0.04212027
Values found in file YPM-ICH-024741-diceCT.pca: 0.02515803
Values found in file YPM-MAM-005706-diceCT.pca: 0.03718126


In [41]:
# Create an empty DataFrame to store the extracted values
data = []

# Define the text pattern you want to search for using regular expressions
search_pattern = r'VoxelSizeX=(\d+\.\d+)'

# Iterate over each file in the directory
for file_name in os.listdir(directory):
    # Check if the file is a PCA file
    if file_name.endswith('.pca'):
        file_path = os.path.join(directory, file_name)
         # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read the contents of the file
            file_contents = file.read()
            
            # Search for the specified text pattern within the file contents
            matches = re.findall(search_pattern, file_contents)
            
            # If matches are found, extract the floating-point values after the text pattern
            if matches:
                values = [float(match) for match in matches]
                
                # Create a dictionary with the file name and the extracted values
                record = {'File': file_name}
                for i, value in enumerate(values):
                    record[f'Value{i+1}'] = value
                
                # Append the dictionary to the data list
                data.append(record)
                
                # Create a DataFrame from the data list
df = pd.DataFrame(data)

# Print the resulting DataFrame
print(df)

                         File    Value1
0      UF-herp-79994-head.pca  0.019554
1              MZUC-32032.pca  0.021833
2       VIMS-40878-diceCT.pca  0.020713
3    UF-herp-84427-diceCT.pca  0.042230
4     UF-herp-142512-head.pca  0.021167
5           CAS-AMP-93870.pca  0.106280
6      ANSP-126326-diceCT.pca  0.035371
7    UF-bird-52652-diceCT.pca  0.019147
8      UF-herp-79994-body.pca  0.042120
9   YPM-ICH-024741-diceCT.pca  0.025158
10  YPM-MAM-005706-diceCT.pca  0.037181


In [42]:
# Rename a single column
df = df.rename(columns={'Value1': 'Voxel size (mm)'})

# Print the resulting DataFrame
print(df)

                         File  Voxel size (mm)
0      UF-herp-79994-head.pca         0.019554
1              MZUC-32032.pca         0.021833
2       VIMS-40878-diceCT.pca         0.020713
3    UF-herp-84427-diceCT.pca         0.042230
4     UF-herp-142512-head.pca         0.021167
5           CAS-AMP-93870.pca         0.106280
6      ANSP-126326-diceCT.pca         0.035371
7    UF-bird-52652-diceCT.pca         0.019147
8      UF-herp-79994-body.pca         0.042120
9   YPM-ICH-024741-diceCT.pca         0.025158
10  YPM-MAM-005706-diceCT.pca         0.037181


In [53]:
import os
import re
import pandas as pd

# Define the search patterns and corresponding column names
search_patterns = {
    'Voxel size (mm)': r'VoxelSizeX=(\d+\.\d+)',
    'Exposure (ms)': r'TimingVal=(\d+\.\d+)',
    # searching for 2 or 3 digit numbers
    'Voltage (kV)': r'\nVoltage=\b(\d{2,3})\b', # add the \n to search for the term starting on a new line
    'Current (uA)': r'\nCurrent=\b(\d{2,3})\b',
    'Frame averaging': r'Avg=\b(\d{1})\b'
}


# Create an empty dictionary to store the extracted values
data = []

# Iterate over each file in the directory
for file_name in os.listdir(directory):
    # Check if the file is a PCA file
    if file_name.endswith('.pca'):
        file_path = os.path.join(directory, file_name)

        # Create a dictionary for each file
        file_data = {'File': file_name}
        
        # Open the file in read mode
        with open(file_path, 'r') as file:
            # Read the contents of the file
            file_contents = file.read()

            # Extract values for each search pattern
            for pattern_name, pattern in search_patterns.items():
                matches = re.findall(pattern, file_contents)
                if matches:
                    values = [float(match) for match in matches]
                    if len(values) == 1:
                        file_data[pattern_name] = values[0]
                    else:
                        file_data[pattern_name] = values

        # Append the record to the data list
        data.append(file_data)

# Create a DataFrame from the data list
df = pd.DataFrame(data)

# Print the resulting DataFrame
print(df)


                         File  Voxel size (mm)  Exposure (ms)  Voltage (kV)  \
0      UF-herp-79994-head.pca         0.019554        200.098         100.0   
1              MZUC-32032.pca         0.021833        200.098          80.0   
2       VIMS-40878-diceCT.pca         0.020713        200.098         100.0   
3    UF-herp-84427-diceCT.pca         0.042230        250.097         130.0   
4     UF-herp-142512-head.pca         0.021167        200.098         100.0   
5           CAS-AMP-93870.pca         0.106280        200.098         120.0   
6      ANSP-126326-diceCT.pca         0.035371        200.098         130.0   
7    UF-bird-52652-diceCT.pca         0.019147        200.098         100.0   
8      UF-herp-79994-body.pca         0.042120        200.098         120.0   
9   YPM-ICH-024741-diceCT.pca         0.025158        200.098         120.0   
10  YPM-MAM-005706-diceCT.pca         0.037181        200.098         130.0   

    Current (uA)  Frame averaging  
0          200.

In [54]:
# Assuming you have a DataFrame called 'df'

# Specify the file path for the CSV file
csv_file_path = '/blue/practicum-ai/jaimigray/python_for_ai/PCA parser/PCA/scanning_parameters.csv'

# Export the DataFrame to a CSV file
df.to_csv(csv_file_path, index=False)