In [27]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

data = {'Approximate Plant Age (weeks)': [1, 5, 10, 15, 20],
        'Indoor Or Outdoor?': ['Indoor', 'Outdoor', 'Indoor', 'Outdoor', 'Indoor'],
        'How Often Do You Water?': ['Daily', 'Weekly', 'Monthly', 'Never', 'Daily'],
        }
df = pd.DataFrame(data)
print(f"Plant data : \n{df}")

# Normalize numeric values
df['Approximate Plant Age (weeks)'] = (df['Approximate Plant Age (weeks)'] - df['Approximate Plant Age (weeks)'].min()) / (df['Approximate Plant Age (weeks)'].max() - df['Approximate Plant Age (weeks)'].min())
# print(f"\n Normalized Plant numeric data : \n{df}")

categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
# Check categorical columns
# print(f"\n Categorical columns: {categorical_columns}")
encoder = OneHotEncoder(sparse_output=False)

one_hot_encoded = encoder.fit_transform(df[categorical_columns])

one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
# Encoded categorical data
# print(f"\n Encoded Plant categorical data : \n{one_hot_df}")
df_encoded = pd.concat([df, one_hot_df], axis=1)

df_encoded = df_encoded.drop(categorical_columns, axis=1)
print(f"Encoded Plant data : \n{df_encoded}")

Plant data : 
   Approximate Plant Age (weeks) Indoor Or Outdoor? How Often Do You Water?
0                              1             Indoor                   Daily
1                              5            Outdoor                  Weekly
2                             10             Indoor                 Monthly
3                             15            Outdoor                   Never
4                             20             Indoor                   Daily
Encoded Plant data : 
   Approximate Plant Age (weeks)  Indoor Or Outdoor?_Indoor  \
0                       0.000000                        1.0   
1                       0.210526                        0.0   
2                       0.473684                        1.0   
3                       0.736842                        0.0   
4                       1.000000                        1.0   

   Indoor Or Outdoor?_Outdoor  How Often Do You Water?_Daily  \
0                         0.0                            1.0   
1

In [38]:
import numpy as np

feature_vector = df_encoded.values
print(f"Feature vector shape: {feature_vector.shape}")
print(f"Feature vector: \n {feature_vector}")

Feature vector shape: (5, 7)
Feature vector: 
 [[0.         1.         0.         1.         0.         0.
  0.        ]
 [0.21052632 0.         1.         0.         0.         0.
  1.        ]
 [0.47368421 1.         0.         0.         1.         0.
  0.        ]
 [0.73684211 0.         1.         0.         0.         1.
  0.        ]
 [1.         1.         0.         1.         0.         0.
  0.        ]]


In [37]:
import torch
metadata_tensor = torch.tensor(feature_vector, dtype=torch.float32)
print(f"Metadata tensor shape: {metadata_tensor.shape}")
print(f"Metadata tensor: \n {metadata_tensor}")

Metadata tensor shape: torch.Size([5, 7])
Metadata tensor: 
 tensor([[0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000],
        [0.2105, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 1.0000],
        [0.4737, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000],
        [0.7368, 0.0000, 1.0000, 0.0000, 0.0000, 1.0000, 0.0000],
        [1.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000]])
