In [79]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Set the backend to TkAgg for interactive plotting
plt.switch_backend('TkAgg')

In [80]:
# Read the data file
df = pd.read_csv("./labeled_dataset-rgb_combinations_labeled.csv", sep=',')

# Print the data for verification
print('\n\ndf.head(10):\n')
print(df.head(10))



df.head(10):

    current      R      G      B  rgb unique combination
0  0.124718  0.000  0.000  0.082                       0
1  0.336410  0.000  0.000  0.333                       1
2  0.084220  0.000  0.000  0.000                       2
3  0.465096  0.498  0.000  0.000                       3
4  0.447505  0.000  0.165  0.333                       4
5  0.440049  0.247  0.165  0.082                       5
6  0.553050  0.498  0.165  0.000                       6
7  0.340914  0.000  0.000  0.333                       1
8  0.126161  0.000  0.000  0.082                       0
9  0.465773  0.498  0.000  0.000                       3


In [93]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


def predict_rgb_class(new_data_point):
  """
  Predicts class labels for the red, green, and blue channels based on a new data point.

  Args:
      new_data_point (float): The value for which to predict class labels.

  Returns:
      tuple: A tuple containing the predicted class labels for red, green, and blue channels.
  """

  # Separate features and target
  X = df.copy()[['current']]
  y = pd.DataFrame(df, columns=['R', 'G', 'B', 'rgb unique combination'])

  # Check for missing values in features (X_train)
  if X.isna().any().any():
      # Handle missing values in X_train (e.g., imputation or removal)
      print("Missing values found in X_train. Please handle them appropriately.")
      # ... (your missing value handling code)

  # Check for missing values in target (y)
  if y.isna().any().any():
      # Handle missing values in y (e.g., imputation or removal)
      print("Missing values found in y. Please handle them appropriately.")
      # ... (your missing value handling code)

  # Convert "rgb unique combination" to categorical format (assuming pandas DataFrame)
  y = pd.get_dummies(y, columns=['rgb unique combination'])

  # Split data into training and testing sets
  y_encoded = pd.get_dummies(y)
  X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

  # Normalize data (can be beneficial for classification models)
  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  # Create individual multi-label classifiers for each channel
  clf_R = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
  clf_G = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
  clf_B = OneVsRestClassifier(RandomForestClassifier(n_estimators=100, random_state=42))

  print(y_train)

  # Assuming encoded columns have the same prefix (e.g., 'R_', 'G_', 'B_')
  red_encoded_column = y_train.filter(like='R_')
  green_encoded_column = y_train.filter(like='G_')
  blue_encoded_column = y_train.filter(like='B_')

  # Check for empty DataFrames (handle missing classes or naming inconsistencies)
  if red_encoded_column.empty:
      print("Red channel has no encoded classes. Handle this appropriately.")
  # ... (similar checks for green and blue channels)

  # Train each classifier with the corresponding encoded column (if not empty)
  if not red_encoded_column.empty:
      clf_R.fit(X_train, red_encoded_column)
  if not green_encoded_column.empty:
      clf_G.fit(X_train, green_encoded_column)
  if not blue_encoded_column.empty:
      clf_B.fit(X_train, blue_encoded_column)

  # Prepare the new data point for prediction (assuming it's a single value)
  new_data_point = scaler.transform([[new_data_point]])  # Reshape for prediction

  # Predict class labels for the new data point (use trained models if available)
  predicted_R = None
  predicted_G = None
  predicted_B = None
  if not red_encoded_column.empty:
      predicted_R = clf_R.predict(new_data_point)[0]
      print(predicted_R)
  if not green_encoded_column.empty:
      predicted_G = clf_G.predict(new_data_point)[0]
      print(predicted_G)
  if not blue_encoded_column.empty:
      predicted_B = clf_B.predict(new_data_point)[0]
      print(predicted_B)


predict_rgb_class(0)

          R      G      B  rgb unique combination_0  rgb unique combination_1  \
2705  0.498  0.000  0.000                     False                     False   
5313  0.498  0.165  0.082                     False                     False   
653   0.247  0.416  0.082                     False                     False   
429   0.247  0.000  0.333                     False                     False   
3914  0.000  0.000  0.082                      True                     False   
...     ...    ...    ...                       ...                       ...   
3772  0.000  0.000  0.000                     False                     False   
5191  0.247  0.416  0.082                     False                     False   
5226  0.247  0.165  0.082                     False                     False   
5390  0.247  0.000  0.082                     False                     False   
860   0.498  0.416  0.000                     False                     False   

      rgb unique combinatio

