# Data Directory Building
This notebook contains the code used to create a directory csv file for every point cloud.

In [2]:
# Import Libraries
import os
import pandas as pd
import glob

In [3]:
# Trial dirs
cf = 'Cucumber_Fertilizer_Trial'
gp = 'Garden_Plots'
pv = 'Pepper_Variety_Trial'
pw = 'Personal-Sized_Watermelons'
sw = 'Seedless_Watermelon_Variety_Trial'
tf = 'Tomato_Fertilizer_Trial'
ta = 'Transplant-Age_Plants'
wi = 'Watermelon_Irrigation_Trial'
all_dirs = [cf, gp, pv, pw, sw, tf, ta, wi]

In [4]:
codes = ['cf', 'gp', 'pv', 'pw', 'sw', 'tf', 'ta', 'wi']

In [5]:
# Sampling date collector

def get_names_with_strings(dir_path):
    full_list = os.listdir(dir_path)
    kw = 'Sampling'
    sampling_list = [v for v in full_list if kw in v]
    
    return sampling_list

In [6]:
# Get sampling date lists
cfd = get_names_with_strings(cf)
gpd = get_names_with_strings(gp)
pvd = get_names_with_strings(pv)
pwd = get_names_with_strings(pw)
swd = get_names_with_strings(sw)
tfd = get_names_with_strings(tf)
tad = get_names_with_strings(ta)
wid = get_names_with_strings(wi)
date_list=[cfd, gpd, pvd, pwd, swd, tfd, tad, wid]

In [7]:
# Sample directory builder

def directory_builder(code, trial, sampling):
    # create an empty df
    df = pd.DataFrame(columns=['plot_id', 'date_dir', 'trial_dir'])
    # loop sampling dirs:
        # list sample folders
        # for sample folder, add row to df with name, sampling dir, trial
    for i in sampling:
        plot_list = os.listdir(trial+'/'+i)
        for j in plot_list:
            df.loc[len(df.index)]=[j, i, trial]
    # add col for whole path including las file
    df['file_path'] = df.apply(lambda row:
                               '{a}\{b}\{c}\{c}.las'.format(a=row['trial_dir'], 
                                                            b=row['date_dir'], 
                                                            c=row['plot_id']),
                              axis=1)
    # add col for just sampling date itself, no path
    df['date'] = df.apply(lambda row: row['date_dir'].replace('Sampling_',''),
                         axis=1)
    # add column with unique id for trial, date, and sample id
    df['sample_id']=df.apply(lambda row: '{d}_{e}_{f}'.format(d=code,
                                                             e=row['plot_id'],
                                                             f=row['date']), 
                            axis=1)
    # return df
    print(df)
    return df

cfdf = directory_builder(codes[0], all_dirs[0], date_list[0])

   plot_id             date_dir                  trial_dir  \
0     5016  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
1     5027  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
2     5035  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
3     5044  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
4     5051  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
5     5062  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
6     5073  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
7     6016  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
8     6022  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
9     6031  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
10    6045  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
11    6054  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
12    6067  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
13    6073  Sampling_07_23_2023  Cucumber_Fertilizer_Trial   
14    5016  Sampling_08_10_2023  Cucumber_Fertilizer_Trial   
15    50

In [8]:
# Repeat
gpdf = directory_builder(codes[1], all_dirs[1], date_list[1])
pvdf = directory_builder(codes[2], all_dirs[2], date_list[2])
pwdf = directory_builder(codes[3], all_dirs[3], date_list[3])
swdf = directory_builder(codes[4], all_dirs[4], date_list[4])
tfdf = directory_builder(codes[5], all_dirs[5], date_list[5])
tadf = directory_builder(codes[6], all_dirs[6], date_list[6])
widf = directory_builder(codes[7], all_dirs[7], date_list[7])

   plot_id             date_dir     trial_dir  \
0       01  Sampling_06_24_2023  Garden_Plots   
1       02  Sampling_06_24_2023  Garden_Plots   
2       03  Sampling_06_24_2023  Garden_Plots   
3       04  Sampling_06_24_2023  Garden_Plots   
4       05  Sampling_06_24_2023  Garden_Plots   
..     ...                  ...           ...   
85      20  Sampling_10_02_2023  Garden_Plots   
86      01  Sampling_10_04_2023  Garden_Plots   
87      03  Sampling_10_04_2023  Garden_Plots   
88      04  Sampling_10_04_2023  Garden_Plots   
89      16  Sampling_10_04_2023  Garden_Plots   

                                     file_path        date         sample_id  
0   Garden_Plots\Sampling_06_24_2023\01\01.las  06_24_2023  gp_01_06_24_2023  
1   Garden_Plots\Sampling_06_24_2023\02\02.las  06_24_2023  gp_02_06_24_2023  
2   Garden_Plots\Sampling_06_24_2023\03\03.las  06_24_2023  gp_03_06_24_2023  
3   Garden_Plots\Sampling_06_24_2023\04\04.las  06_24_2023  gp_04_06_24_2023  
4   Garden_Plots

    plot_id             date_dir             trial_dir  \
0      R1C4  Sampling_06_22_2023  Pepper_Variety_Trial   
1     R1C41  Sampling_06_22_2023  Pepper_Variety_Trial   
2     R1C42  Sampling_06_22_2023  Pepper_Variety_Trial   
3     R1C43  Sampling_06_22_2023  Pepper_Variety_Trial   
4     R1C44  Sampling_06_22_2023  Pepper_Variety_Trial   
..      ...                  ...                   ...   
107    R2C7  Sampling_10_20_2023  Pepper_Variety_Trial   
108    R3C4  Sampling_10_20_2023  Pepper_Variety_Trial   
109    R3C7  Sampling_10_20_2023  Pepper_Variety_Trial   
110    R4C4  Sampling_10_20_2023  Pepper_Variety_Trial   
111    R4C7  Sampling_10_20_2023  Pepper_Variety_Trial   

                                             file_path        date  \
0    Pepper_Variety_Trial\Sampling_06_22_2023\R1C4\...  06_22_2023   
1    Pepper_Variety_Trial\Sampling_06_22_2023\R1C41...  06_22_2023   
2    Pepper_Variety_Trial\Sampling_06_22_2023\R1C42...  06_22_2023   
3    Pepper_Variety_Tri

In [9]:
# Concat Dataframes
df_list = [cfdf, gpdf, pvdf, pwdf, swdf, tfdf, tadf, widf]
all_df = pd.concat(df_list)

In [10]:
all_df.info

<bound method DataFrame.info of    plot_id             date_dir                    trial_dir  \
0     5016  Sampling_07_23_2023    Cucumber_Fertilizer_Trial   
1     5027  Sampling_07_23_2023    Cucumber_Fertilizer_Trial   
2     5035  Sampling_07_23_2023    Cucumber_Fertilizer_Trial   
3     5044  Sampling_07_23_2023    Cucumber_Fertilizer_Trial   
4     5051  Sampling_07_23_2023    Cucumber_Fertilizer_Trial   
..     ...                  ...                          ...   
4     t5-2  Sampling_06_30_2023  Watermelon_Irrigation_Trial   
5     t5-3  Sampling_06_30_2023  Watermelon_Irrigation_Trial   
6     t8-1  Sampling_06_30_2023  Watermelon_Irrigation_Trial   
7     t8-2  Sampling_06_30_2023  Watermelon_Irrigation_Trial   
8     t8-3  Sampling_06_30_2023  Watermelon_Irrigation_Trial   

                                            file_path        date  \
0   Cucumber_Fertilizer_Trial\Sampling_07_23_2023\...  07_23_2023   
1   Cucumber_Fertilizer_Trial\Sampling_07_23_2023\...  07_23_

In [11]:
all_df.to_csv('pc_directory.csv', index=False)