## Add New Data to the big DataLibrary and update information in Summary sheet

### extract data from an excel file and create a new data frame with information

    Parameters to change: 
    1) newdata_path= path to the folder where the new data is at
    2) library_filename = 'RX_Data_Library.xlsx' # big data library 
    3) data_filename= 'position.xlsx'  # new data excel info
    4) lib_name= 'Lib6'  # library name to assign the new data to or name of existing library
    5) study_date = 
    
    

In [18]:
import pandas as pd
from openpyxl import load_workbook
import os 
from pathlib import Path

def getData(newdata_path, linuxPath, DataLib_filename, data_filename, lib_name, sampleNum):
    """
    This function extract data information from an excel file and reformat the information into a data frame.
    
    The new data frame will have the same column headings as the big Data Library.
    """
    # asumming data are all in 1 folder:
    DataLibrary_df= pd.read_excel(DataLib_filename, sheet_name= 'DataLibrary')  # big data file
    newRowNum= DataLibrary_df.shape[0] +1  # starting Row Number of new data

    olddata_path= os.path.join(newdata_path, data_filename)
    olddata_df= pd.read_excel(olddata_path,sheet_name='scan_position_tag') # info from 3D scan
    newdata_df= pd.DataFrame(columns=DataLibrary_df.columns)

    newdata_df['filename']= olddata_df['filename']
    newdata_df['Axis0']= olddata_df['Axis0']
    newdata_df['Axis1']= olddata_df['Axis1']
    newdata_df['Axis2']= olddata_df['Axis2']
    newdata_df['Axis3']= olddata_df['Axis3']
    newdata_df['Axis4']= olddata_df['Axis4']
    newdata_df['LibraryName']= lib_name
    
    for row in range(olddata_df.shape[0]):
        newdata_df.loc[row, 'Row'] = row+ newRowNum
        #linuxPath= '/home/ai1/beacon/3D_Position_Data_Bosheng/3D_eZono_PGE2A1234_300ms'
        newdata_df.loc[row, 'Folder_abspath'] = linuxPath

    scaninfo_df= pd.read_excel(olddata_path,sheet_name='scan_info')

    newdata_df['US_machine']= scaninfo_df.iloc[0,1]
    newdata_df['NeedleID']= scaninfo_df.iloc[2,1]
    if scaninfo_df.iloc[2,1]== 'PGE2A616': 
        transLen= 0.6
    else: transLen= 1
    
    # other info for user to set:
    newdata_df['StudyName']= 'RX_3D_WaterTank'
    newdata_df['Owner']= 'Bosheng Wu'
    newdata_df['Medium']= 'Water'
    newdata_df['AutomaticTankTest'] = 'Y'
    newdata_df['TransducerLength_mm']= transLen
    newdata_df['PicoSeries']=5000
    newdata_df['SamplingRate_MSps']= 125
    newdata_df['SampleInterval_ns']= 16
    newdata_df['GainSetting_dB']= 10
    newdata_df['TestType']= 'Receiving Test'
    newdata_df['NeedleType']= 'PZT'
    newdata_df['DataFolderName']= os.path.split(newdata_path)[1]
    newdata_df['SampleNum'] = sampleNum #2500000 #18750000
    
    return newdata_df

## Main code for adding new data into the data library 1 at a time

In [12]:
# for user to set:

newdata_path= Path('Y:/Beacon_Datalibrary/RX_3D_WaterTank/3D_GE_PGE2A616_400ms') 
linuxPath= Path('/home/ai1/beacon/3D_Position_Data_Bosheng/3D_GE_PGE2A616_400ms')
library_filename = 'RX_Data_Library.xlsx' # change this
data_filename= 'position.xlsx'  # change this
lib_name= 'Lib7'  # change this
study_date = '5/10/2019' # change this

# STEP 1: GO TO DATA AND EXTRACT RELEVANT INFO TO CREATE A NEW DF

# check if addtosheet is new or already exist
summary_df= pd.read_excel(library_filename, sheet_name='Summary')
lib_avail= summary_df['LibraryName'].unique()
if lib_name in lib_avail:
    print('{s} already exist, new data will be appended onto {s}.'.format(s=lib_name)) 
else:
    print('{s} is created. New data will be added to {s}.'.format(s=lib_name))
# create new data frame
newdata_df= getData(newdata_path, linuxPath, library_filename, data_filename, lib_name)

newdata_df

# STEP 2: UPLOAD ONTO DATA LIBRARY SHEET
print('Appending data onto DataLibrary sheet...')
book = load_workbook(library_filename) # open excel workbook
sheets= book.sheetnames 
ws= book[sheets[sheets.index('DataLibrary')]] # DataLibrary sheet
for index, row in newdata_df.iterrows():    
    ws.append(row.tolist()) # add new data

# STEP 3: ADD ROW INFO INTO SUMMARY SHEET
summary_row= [lib_name]
summary_row += ['{}({} - {}dB - {})'.format(newdata_df.loc[0,'StudyName'],newdata_df.loc[0,'US_machine'],newdata_df.loc[0,'GainSetting_dB'], newdata_df.loc[0,'Medium'])]
summary_row += [linuxPath]
summary_row += [newdata_df.loc[0,'Owner']]
summary_row += ['({} - {})'.format(newdata_df.loc[0,'Row'],newdata_df.iloc[-1]['Row'])]
summary_row += [study_date]

ws= book[sheets[sheets.index('Summary')]] 
ws.append(summary_row)
book.save(library_filename)
print('Done!')

Lib7 is created. New data will be added to Lib7.
Appending data onto DataLibrary sheet...
Done!


## Waking directory tree to add all at once
Running this from Windows but the data Path is Linux

Could change the windowRoot= linuxRoot if running this from Linux


In [23]:
linuxRoot= Path('/home/ai1/beacon/3D_Position_Data_Bosheng')
windowRoot= Path('Y:/Beacon_Datalibrary/RX_3D_WaterTank')
lib= ['Lib6', 'Lib6', 'Lib7', 'Lib7', 'Lib8', 'Lib8', 'Lib9', 'Lib9']  # library names to assign to data from each folder
library_filename = 'RX_Data_Library.xlsx' 
sampleCalc= [18750000, 18750000, 25000000, 25000000, 12500000, 12500000,12500000,12500000]
study_date = '5/10/2019'
data_filename= 'position.xlsx'

folderCount=0
for root, dir, files in os.walk(windowRoot):
    folderName= root.split("\\")[-1]
    if not folderName.startswith('3D'): continue  # skip unwanted folders
        
    linuxPath= os.path.join(linuxRoot, folderName)
    newdata_path= os.path.join(windowRoot, folderName)
    lib_name= lib[folderCount]
    sampleNum= sampleCalc[folderCount]
    folderCount+= 1
    
    # STEP 1: GO TO DATA AND EXTRACT RELEVANT INFO TO CREATE A NEW DF

    # check if addtosheet is new or already exist
    summary_df= pd.read_excel(library_filename, sheet_name='Summary')
    lib_avail= summary_df['LibraryName'].unique()
    if lib_name in lib_avail:
        print('{s} already exist, new data will be appended onto {s}.'.format(s=lib_name)) 
    else:
        print('{s} is created. New data will be added to {s}.'.format(s=lib_name))
    # create new data frame
    newdata_df= getData(newdata_path, linuxPath, library_filename, data_filename, lib_name, sampleNum)

    newdata_df

    # STEP 2: UPLOAD ONTO DATA LIBRARY SHEET
    print('Appending data onto DataLibrary sheet...')
    book = load_workbook(library_filename) # open excel workbook
    sheets= book.sheetnames 
    ws= book[sheets[sheets.index('DataLibrary')]] # DataLibrary sheet
    for index, row in newdata_df.iterrows():    
        ws.append(row.tolist()) # add new data

    # STEP 3: ADD ROW INFO INTO SUMMARY SHEET
    summary_row= [lib_name]
    summary_row += ['{}({} - {}dB - {})'.format(newdata_df.loc[0,'StudyName'],newdata_df.loc[0,'US_machine'],newdata_df.loc[0,'GainSetting_dB'], newdata_df.loc[0,'Medium'])]
    summary_row += [linuxPath]
    summary_row += [newdata_df.loc[0,'Owner']]
    summary_row += ['({} - {})'.format(newdata_df.loc[0,'Row'],newdata_df.iloc[-1]['Row'])]
    summary_row += [study_date]

    ws= book[sheets[sheets.index('Summary')]] 
    ws.append(summary_row)
    book.save(library_filename)
    
    print(folderName, 'successfully added to ', lib_name)

Lib6 is created. New data will be added to Lib6.
Appending data onto DataLibrary sheet...
3D_eZono_PGE2A1234_300ms successfully added to  Lib6
Lib6 already exist, new data will be appended onto Lib6.
Appending data onto DataLibrary sheet...
3D_eZono_PGE2A616_300ms successfully added to  Lib6
Lib7 is created. New data will be added to Lib7.
Appending data onto DataLibrary sheet...
3D_GE_PGE2A1234_400ms successfully added to  Lib6
Lib7 already exist, new data will be appended onto Lib7.
Appending data onto DataLibrary sheet...
3D_GE_PGE2A616_400ms successfully added to  Lib6
Lib8 is created. New data will be added to Lib8.
Appending data onto DataLibrary sheet...
3D_SonoSite_PGE2A1234_200ms successfully added to  Lib6
Lib8 already exist, new data will be appended onto Lib8.
Appending data onto DataLibrary sheet...
3D_SonoSite_PGE2A616_200ms successfully added to  Lib6
Lib9 is created. New data will be added to Lib9.
Appending data onto DataLibrary sheet...
3D_Toshiba_PGE2A1234_200ms succ

In [22]:

print(folderName, 'successfully added to ', lib_name)

3D_eZono_PGE2A1234_300ms successfully added to  Lib6
