In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 1. Preprocessing

### Weight Data

In [7]:
## Load the data
df = pd.read_csv('../materials/raw_csv/weight_data.csv')
df.head()

Unnamed: 0,No.,Treatment,Rep,Day 0,Day 1,Day 2,Day 3,Day 4,Day 5,Day 6,Day 7,Day 8
0,1.0,5oC,1.0,158.9,154.5,149.7,148.4,147.5,147.2,146.5,145.8,144.8
1,,,2.0,153.9,150.3,145.0,144.0,143.3,142.8,142.2,141.6,140.5
2,,,3.0,168.6,164.2,159.8,158.5,157.5,157.2,156.0,155.1,153.8
3,,,4.0,110.7,108.1,106.1,105.5,104.8,104.5,103.5,102.5,101.8
4,,,5.0,166.5,162.6,160.4,158.8,157.9,157.2,156.0,154.9,153.9


In [8]:
## Normalize columns names
df.columns = df.columns.str.strip()

In [9]:
## Drop the 'No. ' column
df.drop(['No.'],axis=1,inplace=True)
df.dropna(subset=['Day 0'], axis=0, inplace=True)
df.head()

Unnamed: 0,Treatment,Rep,Day 0,Day 1,Day 2,Day 3,Day 4,Day 5,Day 6,Day 7,Day 8
0,5oC,1.0,158.9,154.5,149.7,148.4,147.5,147.2,146.5,145.8,144.8
1,,2.0,153.9,150.3,145.0,144.0,143.3,142.8,142.2,141.6,140.5
2,,3.0,168.6,164.2,159.8,158.5,157.5,157.2,156.0,155.1,153.8
3,,4.0,110.7,108.1,106.1,105.5,104.8,104.5,103.5,102.5,101.8
4,,5.0,166.5,162.6,160.4,158.8,157.9,157.2,156.0,154.9,153.9


In [10]:
## Fill the 'Treatment' column
df['Treatment'] = df['Treatment'].ffill()
df.tail()

Unnamed: 0,Treatment,Rep,Day 0,Day 1,Day 2,Day 3,Day 4,Day 5,Day 6,Day 7,Day 8
35,20oC,6.0,119.1,110.8,109.8,105.7,104.0,99.2,79.9,,
36,20oC,7.0,117.7,111.1,109.8,106.6,104.8,100.6,87.3,,
37,20oC,8.0,124.1,116.7,115.6,112.0,110.3,106.1,104.5,,
38,20oC,9.0,126.5,120.6,119.5,115.9,113.7,110.1,99.2,,
39,20oC,10.0,125.8,120.2,118.4,115.4,113.4,108.2,105.8,,


In [11]:
## Normalize the treatment column
df['Treatment'] = df['Treatment'].str.replace('oC','')
df.tail()

Unnamed: 0,Treatment,Rep,Day 0,Day 1,Day 2,Day 3,Day 4,Day 5,Day 6,Day 7,Day 8
35,20,6.0,119.1,110.8,109.8,105.7,104.0,99.2,79.9,,
36,20,7.0,117.7,111.1,109.8,106.6,104.8,100.6,87.3,,
37,20,8.0,124.1,116.7,115.6,112.0,110.3,106.1,104.5,,
38,20,9.0,126.5,120.6,119.5,115.9,113.7,110.1,99.2,,
39,20,10.0,125.8,120.2,118.4,115.4,113.4,108.2,105.8,,


In [12]:
# Normalize the keys to the format "Treatment_Day_Rep"
normalized_key_value_data = {
    f"{day.split(' ')[-1]}_{int(row['Treatment'])}_{int(row['Rep'])}": row[day]
    for _, row in df.iterrows()
    for day in df.columns[2:]  # Skip the first two columns (Treatment, Rep)
}
normalized_key_value_df = pd.DataFrame(
    normalized_key_value_data.items(),
    columns=['Label', 'Weight']
)


In [14]:
from natsort import natsorted


df = normalized_key_value_df
df = df.reset_index(drop=True)
df = df[natsorted(df.columns)]

In [15]:
df

Unnamed: 0,Label,Weight
0,0_5_1,158.9
1,1_5_1,154.5
2,2_5_1,149.7
3,3_5_1,148.4
4,4_5_1,147.5
...,...,...
355,4_20_10,113.4
356,5_20_10,108.2
357,6_20_10,105.8
358,7_20_10,


In [16]:
df.dropna(inplace=True)
df.to_csv('../materials/process_csv/weight_data.csv', index=False)

### Image Data

In [68]:
import os
import re
from PIL import Image
import cv2
import numpy as np
import pandas as pd
import os
import natsort

##### Function for Extract Color Stats

In [66]:
def extract_color(folder_path, output_csv):
    # Initialize a list to store statistics for all images
    all_data = []

    # Get a naturally sorted list of files in the folder
    file_list = natsort.natsorted(os.listdir(folder_path))

    # Loop through all files in the folder
    for filename in file_list:
        image_path = os.path.join(folder_path, filename)

        # Check if the file is an image
        if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
            # Load the image
            image = cv2.imread(image_path)

            if image is None:
                print(f"Warning: Unable to load {filename}. Skipping...")
                continue

            # Convert the image to different color spaces
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)

            # Function to calculate mean and standard deviation for a given image
            def calc_mean_std(image):
                mean, stddev = cv2.meanStdDev(image)
                return mean.flatten(), stddev.flatten()

            # Calculate stats for each color space
            rgb_mean, rgb_std = calc_mean_std(image_rgb)
            hsv_mean, hsv_std = calc_mean_std(image_hsv)
            lab_mean, lab_std = calc_mean_std(image_lab)

            # Append the data for this image to the list
            data = {
                "Label": filename.split(".")[0],
                "R_Mean": rgb_mean[0], "R_Std": rgb_std[0],
                "G_Mean": rgb_mean[1], "G_Std": rgb_std[1],
                "B_Mean": rgb_mean[2], "B_Std": rgb_std[2],
                "H_Mean": hsv_mean[0], "H_Std": hsv_std[0],
                "S_Mean": hsv_mean[1], "S_Std": hsv_std[1],
                "V_Mean": hsv_mean[2], "V_Std": hsv_std[2],
                "L_Mean": lab_mean[0], "L_Std": lab_std[0],
                "A_Mean": lab_mean[1], "A_Std": lab_std[1],
                "B_Lab_Mean": lab_mean[2], "B_Lab_Std": lab_std[2],
            }
            
            all_data.append(data)

    # Create a DataFrame from the list of dictionaries
    if all_data:
        final_df = pd.DataFrame(all_data)
        # Save the DataFrame to a CSV file
        final_df.to_csv(output_csv, index=False)
        print(f"Color statistics for all images saved to {output_csv}")
    else:
        print("No valid images found in the folder.")


In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Function to calculate the area of leaves by detecting contours
def calculate_leaf_areas(image_path):
    # Read the image
    img = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply a binary threshold to create a binary image
    _, binary = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Calculate area of each contour (leaf) and store them
    leaf_areas = [cv2.contourArea(cnt) for cnt in contours if cv2.contourArea(cnt) > 100]
    
    return leaf_areas, contours, img

# Process all images and calculate leaf areas
leaf_areas_list = []
for image_path in images:
    leaf_areas, contours, img = calculate_leaf_areas(image_path)
    leaf_areas_list.append(leaf_areas)

# Display the results for each image
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

for i, image_path in enumerate(images):
    leaf_areas, contours, img = calculate_leaf_areas(image_path)
    
    ax = axes[i // 2, i % 2]
    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax.set_title(f"Image {i+1}")
    
    # Draw contours around detected leaves
    cv2.drawContours(img, contours, -1, (0, 255, 0), 2)
    
    ax.axis('off')

plt.tight_layout()
plt.show()

# Return the areas of leaves for each image
leaf_areas_list


#### Combined for Area Leaf

In [70]:
file_folder = "../output/color_based"
# Example usage
folder_path = "../output/color_based/raw"  # Replace with your folder path
output_csv = "../materials/raw_csv/color_for_area.csv"  # Replace with your desired output file name

extract_color(folder_path, output_csv)

Color statistics for all images saved to ../materials/raw_csv/color_for_area.csv


#### Combined For Weight

In [12]:
# Path to the folder containing images
file_folder = "../output/color_based"

# Regex pattern to match filenames ending in `_1` and `_2`
pattern = r"(\d+_\d+_\d+)_([1-2])\.png"

# Get a list of matching files
files = [f for f in os.listdir(file_folder) if re.match(pattern, f)]

# Group files by their prefix (e.g., "0_5_1", "0_5_2", etc.)
file_groups = {}
for file in files:
    match = re.match(pattern, file)
    if match:
        prefix = match.group(1)  # e.g., "0_5_1"
        variant = match.group(2)  # e.g., "1" or "2"
        if prefix not in file_groups:
            file_groups[prefix] = {}
        file_groups[prefix][variant] = os.path.join(file_folder, file)

# Combine `_1` and `_2` images for each group
output_files = []
for prefix, variants in file_groups.items():
    if "1" in variants and "2" in variants:
        # Open the two images
        img1 = Image.open(variants["1"])
        img2 = Image.open(variants["2"])
        
        # Combine images (vertically or horizontally)
        combined_width = max(img1.width, img2.width)
        combined_height = img1.height + img2.height
        
        combined_image = Image.new("RGBA", (combined_width, combined_height))
        combined_image.paste(img1, (0, 0))
        combined_image.paste(img2, (0, img1.height))
        
        # Save the combined image
        output_path = os.path.join(f"{file_folder}/combined", f"{prefix}.png")
        combined_image.save(output_path)
        output_files.append(output_path)

# Output combined file paths
output_files


[]

In [13]:
# Example usage
folder_path = "../output/color_based/combined"  # Replace with your folder path
output_csv = "../materials/color_stats_folder.csv"  # Replace with your desired output file name

extract_color(folder_path, output_csv)


Color statistics for all images saved to ../materials/color_stats_folder.csv


In [14]:
import cv2
import numpy as np
import pandas as pd
import os
import natsort

def extract_color(folder_path, output_csv):
    # Initialize a list to store statistics for all images
    all_data = []

    # Get a naturally sorted list of files in the folder
    file_list = natsort.natsorted(os.listdir(folder_path))

    # Define multiple color ranges in HSV
    color_ranges = {
        "Red": [
            (np.array([0, 120, 70]), np.array([10, 255, 255])),  # Lower red range
            (np.array([170, 120, 70]), np.array([180, 255, 255]))  # Upper red range
        ],
        "Green": [
            (np.array([35, 50, 50]), np.array([85, 255, 255]))
        ],
        "Blue": [
            (np.array([100, 150, 0]), np.array([140, 255, 255]))
        ]
    }

    # Loop through all files in the folder
    for filename in file_list:
        image_path = os.path.join(folder_path, filename)

        # Check if the file is an image
        if filename.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
            # Load the image
            image = cv2.imread(image_path)

            if image is None:
                print(f"Warning: Unable to load {filename}. Skipping...")
                continue

            # Convert the image to different color spaces
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
            image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)

            # Function to calculate mean and standard deviation for a given image
            def calc_mean_std(image):
                mean, stddev = cv2.meanStdDev(image)
                return mean.flatten(), stddev.flatten()

            # Calculate stats for each color space
            rgb_mean, rgb_std = calc_mean_std(image_rgb)
            hsv_mean, hsv_std = calc_mean_std(image_hsv)
            lab_mean, lab_std = calc_mean_std(image_lab)

            # Calculate total number of pixels
            total_pixels = image.shape[0] * image.shape[1]

            # Calculate area percentages for each color range
            color_areas = {}
            for color_name, ranges in color_ranges.items():
                # Create a mask for the color range
                mask = np.zeros(image_hsv.shape[:2], dtype=np.uint8)
                for lower, upper in ranges:
                    mask |= cv2.inRange(image_hsv, lower, upper)

                # Count the number of pixels in the color range
                color_area = cv2.countNonZero(mask)
                color_percentage = (color_area / total_pixels) * 100
                color_areas[f"{color_name}_Area"] = color_area
                color_areas[f"{color_name}_Percentage"] = color_percentage

            # Append the data for this image to the list
            data = {
                "Label": filename.split(".")[0],
                "R_Mean": rgb_mean[0], "R_Std": rgb_std[0],
                "G_Mean": rgb_mean[1], "G_Std": rgb_std[1],
                "B_Mean": rgb_mean[2], "B_Std": rgb_std[2],
                "H_Mean": hsv_mean[0], "H_Std": hsv_std[0],
                "S_Mean": hsv_mean[1], "S_Std": hsv_std[1],
                "V_Mean": hsv_mean[2], "V_Std": hsv_std[2],
                "L_Mean": lab_mean[0], "L_Std": lab_std[0],
                "A_Mean": lab_mean[1], "A_Std": lab_std[1],
                "B_Lab_Mean": lab_mean[2], "B_Lab_Std": lab_std[2],
            }

            # Add color area and percentage stats to the data
            data.update(color_areas)

            all_data.append(data)

    # Create a DataFrame from the list of dictionaries
    if all_data:
        final_df = pd.DataFrame(all_data)
        # Save the DataFrame to a CSV file
        final_df.to_csv(output_csv, index=False)
        print(f"Color statistics for all images saved to {output_csv}")
    else:
        print("No valid images found in the folder.")

# Example usage
folder_path = "../output/color_based/combined"  # Replace with your folder path
output_csv = "../materials/process_csv/weight_color_area.csv"  # Replace with your desired output file name

extract_color(folder_path, output_csv)


Color statistics for all images saved to ../materials/color_area.csv


### Area Leaf Data

In [49]:
import pandas as pd
df = pd.read_csv("../materials/raw_csv/area_leaf.csv")
df.describe

<bound method NDFrame.describe of     Trt Code    Day  Leave  Total area (cm2)  Yellowing area (cm2)  \
0    5C  7_1  Day 0      1           262.235                 0.000   
1   NaN  NaN    NaN      2           215.414                 0.000   
2   NaN  NaN    NaN      3           215.536                 0.000   
3   NaN  NaN  Day 1      1           262.235                 0.000   
4   NaN  NaN    NaN      2           215.414                 0.000   
..  ...  ...    ...    ...               ...                   ...   
79  NaN  NaN    NaN      2           258.952               258.952   
80  NaN  NaN    NaN      3           224.830               213.198   
81  NaN  NaN  Day 6      1           275.150               274.463   
82  NaN  NaN    NaN      2           258.952               258.952   
83  NaN  NaN    NaN      3           224.830               224.830   

    Green area (cm2)  Yellowing area (%)  Green area (%)        R  ...    a*0  \
0            262.235                0.00    

In [50]:
df.columns

Index(['Trt', 'Code', 'Day', 'Leave', 'Total area (cm2)',
       'Yellowing area (cm2)', 'Green area (cm2)', 'Yellowing area (%)',
       'Green area (%)', 'R', 'G', 'B', 'Var_R', 'Var_G', 'Var_B', 'X', 'Y',
       'Z', 'L*', 'a*', 'b*', 'Unnamed: 21', 'L*0', 'a*0', 'b*0', 'TCD',
       'Grayscale', 'Unnamed: 27', 'Day.1', 'Yellowing area (%).1',
       'Green area (%).1', 'TCD.1', 'Grayscale.1'],
      dtype='object')

In [51]:
df.dropna(subset=["Leave"], inplace=True)
df.describe

<bound method NDFrame.describe of     Trt Code    Day  Leave  Total area (cm2)  Yellowing area (cm2)  \
0    5C  7_1  Day 0      1           262.235                 0.000   
1   NaN  NaN    NaN      2           215.414                 0.000   
2   NaN  NaN    NaN      3           215.536                 0.000   
3   NaN  NaN  Day 1      1           262.235                 0.000   
4   NaN  NaN    NaN      2           215.414                 0.000   
..  ...  ...    ...    ...               ...                   ...   
79  NaN  NaN    NaN      2           258.952               258.952   
80  NaN  NaN    NaN      3           224.830               213.198   
81  NaN  NaN  Day 6      1           275.150               274.463   
82  NaN  NaN    NaN      2           258.952               258.952   
83  NaN  NaN    NaN      3           224.830               224.830   

    Green area (cm2)  Yellowing area (%)  Green area (%)        R  ...    a*0  \
0            262.235                0.00    

In [52]:
df.ffill()
df.head()

Unnamed: 0,Trt,Code,Day,Leave,Total area (cm2),Yellowing area (cm2),Green area (cm2),Yellowing area (%),Green area (%),R,...,a*0,b*0,TCD,Grayscale,Unnamed: 27,Day.1,Yellowing area (%).1,Green area (%).1,TCD.1,Grayscale.1
0,5C,7_1,Day 0,1,262.235,0.0,262.235,0.0,100.0,157.767,...,,,,154.472,,0.0,0.0,100.0,,147.49
1,,,,2,215.414,0.0,215.414,0.0,100.0,155.89,...,,,,151.63,,1.0,0.0,100.0,2.28,150.44
2,,,,3,215.536,0.0,215.536,0.0,100.0,142.077,...,,,,136.355,,2.0,0.0,100.0,2.76,152.46
3,,,Day 1,1,262.235,0.0,262.235,0.0,100.0,163.523,...,-15.77,22.15,3.212,160.477,,3.0,0.0,100.0,1.65,149.52
4,,,,2,215.414,0.0,215.414,0.0,100.0,156.695,...,-16.48,23.96,2.232,152.027,,4.0,0.0,100.0,3.03,152.41


In [53]:
df.columns

Index(['Trt', 'Code', 'Day', 'Leave', 'Total area (cm2)',
       'Yellowing area (cm2)', 'Green area (cm2)', 'Yellowing area (%)',
       'Green area (%)', 'R', 'G', 'B', 'Var_R', 'Var_G', 'Var_B', 'X', 'Y',
       'Z', 'L*', 'a*', 'b*', 'Unnamed: 21', 'L*0', 'a*0', 'b*0', 'TCD',
       'Grayscale', 'Unnamed: 27', 'Day.1', 'Yellowing area (%).1',
       'Green area (%).1', 'TCD.1', 'Grayscale.1'],
      dtype='object')

In [54]:
df.drop(['Unnamed: 27', 'Day.1', 'Yellowing area (%).1',
       'Green area (%).1', 'TCD.1', 'Grayscale.1'], axis=1, inplace=True)
df.head()

Unnamed: 0,Trt,Code,Day,Leave,Total area (cm2),Yellowing area (cm2),Green area (cm2),Yellowing area (%),Green area (%),R,...,Z,L*,a*,b*,Unnamed: 21,L*0,a*0,b*0,TCD,Grayscale
0,5C,7_1,Day 0,1,262.235,0.0,262.235,0.0,100.0,157.767,...,28.69,27.84,-15.77,22.15,,,,,,154.472
1,,,,2,215.414,0.0,215.414,0.0,100.0,155.89,...,26.71,26.75,-16.48,23.96,,,,,,151.63
2,,,,3,215.536,0.0,215.536,0.0,100.0,142.077,...,19.61,19.51,-16.56,24.7,,,,,,136.355
3,,,Day 1,1,262.235,0.0,262.235,0.0,100.0,163.523,...,31.94,30.88,-15.12,21.34,,27.84,-15.77,22.15,3.212,160.477
4,,,,2,215.414,0.0,215.414,0.0,100.0,156.695,...,26.42,27.46,-17.63,25.74,,26.75,-16.48,23.96,2.232,152.027


In [55]:
df.ffill(inplace=True)

In [56]:
df.columns

Index(['Trt', 'Code', 'Day', 'Leave', 'Total area (cm2)',
       'Yellowing area (cm2)', 'Green area (cm2)', 'Yellowing area (%)',
       'Green area (%)', 'R', 'G', 'B', 'Var_R', 'Var_G', 'Var_B', 'X', 'Y',
       'Z', 'L*', 'a*', 'b*', 'Unnamed: 21', 'L*0', 'a*0', 'b*0', 'TCD',
       'Grayscale'],
      dtype='object')

In [57]:
df.drop(['Yellowing area (cm2)', 'Green area (cm2)', 'Yellowing area (%)',
       'Green area (%)', 'R', 'G', 'B', 'Var_R', 'Var_G', 'Var_B', 'X', 'Y',
       'Z', 'L*', 'a*', 'b*', 'Unnamed: 21', 'L*0', 'a*0', 'b*0', 'TCD',
       'Grayscale'], axis=1, inplace=True)

In [58]:
df.head()

Unnamed: 0,Trt,Code,Day,Leave,Total area (cm2)
0,5C,7_1,Day 0,1,262.235
1,5C,7_1,Day 0,2,215.414
2,5C,7_1,Day 0,3,215.536
3,5C,7_1,Day 1,1,262.235
4,5C,7_1,Day 1,2,215.414


In [59]:
# Step 1: Rename column 'Trt' to 'Treatment'
df = df.rename(columns={'Trt': 'Treatment'})

# Step 2: Replace 'C' in the 'Treatment' column
df['Treatment'] = df['Treatment'].replace({'C': ''}, regex=True)
df['Day'] = df['Day'].replace({'Day ':''},regex=True)

In [71]:
df.head()

Unnamed: 0,Treatment,Code,Day,Leave,Total area (cm2)
0,5,7_1,0,1,262.235
1,5,7_1,0,2,215.414
2,5,7_1,0,3,215.536
3,5,7_1,1,1,262.235
4,5,7_1,1,2,215.414


In [72]:
df['Label'] = df['Day']+'_'+df['Treatment']+'_'+df['Code']
df['Label']

0      0_5_7_1
1      0_5_7_1
2      0_5_7_1
3      1_5_7_1
4      1_5_7_1
        ...   
79    5_20_4_1
80    5_20_4_1
81    6_20_4_1
82    6_20_4_1
83    6_20_4_1
Name: Label, Length: 84, dtype: object

In [73]:
df.head()

Unnamed: 0,Treatment,Code,Day,Leave,Total area (cm2),Label
0,5,7_1,0,1,262.235,0_5_7_1
1,5,7_1,0,2,215.414,0_5_7_1
2,5,7_1,0,3,215.536,0_5_7_1
3,5,7_1,1,1,262.235,1_5_7_1
4,5,7_1,1,2,215.414,1_5_7_1


In [76]:
df.drop(columns=['Treatment','Code','Day','Leave'], inplace=True)
df.to_csv('../materials/process_csv/area_data.csv')

In [77]:
df.head()

Unnamed: 0,Total area (cm2),Label
0,262.235,0_5_7_1
1,215.414,0_5_7_1
2,215.536,0_5_7_1
3,262.235,1_5_7_1
4,215.414,1_5_7_1


### Weight and Color Data

In [15]:
weight_data = pd.read_csv('../materials/weight_data_normalized.csv')
color_data = pd.read_csv('../materials/color_stats_folder.csv')

# Merge the two dataframes
merged_data = pd.merge(weight_data, color_data, on='Label')
merged_data

Unnamed: 0,Label,Weight,R_Mean,R_Std,G_Mean,G_Std,B_Mean,B_Std,H_Mean,H_Std,S_Mean,S_Std,V_Mean,V_Std,L_Mean,L_Std,A_Mean,A_Std,B_Lab_Mean,B_Lab_Std
0,0_5_1,158.9,202.031017,63.184791,206.361271,55.761869,195.552566,74.087194,48.603222,56.316379,26.942528,50.146708,207.003862,55.870933,208.442289,55.784846,124.388767,7.205789,133.458801,10.995181
1,1_5_1,154.5,196.388049,57.669474,200.363846,50.615080,189.855627,68.516092,63.280031,57.690568,26.164496,48.083566,201.372722,50.922278,203.303091,50.910815,124.570902,7.112593,133.317630,10.926267
2,2_5_1,149.7,196.586956,56.816229,200.405571,50.129378,190.606173,67.044048,63.870066,57.068227,24.570594,45.595466,201.448391,50.424279,203.433086,50.298129,124.747403,6.862492,132.948486,10.475270
3,3_5_1,148.4,206.840822,58.391917,210.816657,51.437818,201.517561,68.725207,52.674892,54.528216,22.998771,45.661590,211.541725,51.588476,212.927224,51.478033,124.804936,6.758229,132.644228,10.385137
4,4_5_1,147.5,207.421014,58.118789,211.122241,51.420555,202.135509,68.412750,53.727779,56.380803,22.434080,44.968883,211.884094,51.545167,213.275613,51.336971,124.952920,6.614468,132.517304,10.255598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
332,2_20_10,118.4,204.384967,50.345183,208.882881,42.446480,194.612452,67.042644,61.371267,59.403374,28.421718,52.883715,209.776610,42.625017,211.348455,42.672113,123.963300,8.259418,135.028585,14.358021
333,3_20_10,115.4,210.505290,42.843300,214.566265,35.830962,196.297086,66.813680,59.078567,58.149874,31.265145,58.421417,215.425985,35.932890,216.751299,35.961753,123.749710,8.554217,136.850531,17.902888
334,4_20_10,113.4,222.423504,30.655050,223.428537,28.232242,202.549156,62.405362,54.105349,56.660399,30.381789,58.179694,224.667100,28.013769,225.538257,27.379531,124.799805,6.763650,138.151244,21.016208
335,5_20_10,108.2,226.995930,22.082929,225.147143,22.570170,203.399702,57.467020,58.525626,55.706253,30.525700,57.882589,228.158213,21.464399,227.775484,21.036250,125.884807,4.683147,138.759960,22.549213


In [16]:
print(weight_data.shape)
print(color_data.shape)
print(merged_data.shape)

(340, 2)
(337, 19)
(337, 20)


In [17]:
merged_data.to_csv('../materials/merged_data.csv', index=False)

### Weight and Area Data