You want to explore the relationship between the individual LED currents and the total current. Feature engineering could potentially help with tasks like:

- Predicting individual LED currents based on the total current.
- Identifying abnormal current patterns for fault detection.


In [169]:
import math
import pandas as pd
import numpy as np
from sklearn import preprocessing, svm, metrics, tree
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression
from ydata_profiling import ProfileReport
import matplotlib.pyplot as plt
import seaborn as sns

# Set the backend to TkAgg for interactive plotting
plt.switch_backend('TkAgg')

In [170]:
# Read the data file
df = pd.read_csv("./labeled_dataset_2024.03.30-20h56m01s.csv", sep=',')

# Print the data for verification
print('\n\ndf.head(10):\n')
print(df.head(10))



df.head(10):

    current      R      G      B
0  0.036605  0.639  0.639  0.639
1  0.001844  0.396  0.000  0.396
2  0.012273  0.639  0.396  0.639
3  0.002571  0.639  0.000  0.396
4  0.012477  0.639  0.396  0.639
5  0.006664  0.396  0.396  0.639
6  0.012040  0.396  0.639  0.639
7  0.038608  0.639  0.639  0.639
8  0.001615  0.000  0.396  0.000
9  0.002370  0.639  0.000  0.396


In [171]:
# Generate a report for Pandas Dataframe analysis using the ydata profiling library
# profile = ProfileReport(df, title="Pandas Profiling Report")
# profile.to_notebook_iframe()
# profile.to_file("report.html")

In [172]:
correlation_matrix = df.corr()
print(correlation_matrix)

          current         R         G         B
current  1.000000  0.360415  0.471377  0.415010
R        0.360415  1.000000  0.000523  0.028787
G        0.471377  0.000523  1.000000  0.030066
B        0.415010  0.028787  0.030066  1.000000


In [173]:
df_current = df['current'] 
df_engineering = (df['R'] * 0.360415) + (df['G'] * 0.471377) + (df['B'] * 0.415010)
df_engineering = pd.DataFrame({'current': df_current, 'engineered_data': df_engineering})
correlation_matrix = df_engineering.corr()
print(correlation_matrix)

                  current  engineered_data
current          1.000000         0.709605
engineered_data  0.709605         1.000000


In [174]:
x = df['current'].to_numpy()  # Select the first column
y = df[['R','G','B']].to_numpy()  # Select all columns except the first

# Assuming x is the total current and y is a 2D array with individual LED currents (rows, columns)
led_names = ['Red', 'Green', 'Blue']  # Adjust names as needed

# Create a figure with subplots
fig, axes = plt.subplots(1, 3, figsize=(12, 4))  # Adjust figure size as desired

# Iterate through each LED and create a scatter plot
for i, led_name in enumerate(led_names):
  axes[i].scatter(x, y[:, i])  # i refers to the column index in y for each LED
  axes[i].set_title(f"{led_name} LED Current")
  axes[i].set_xlabel('Total Current')
  axes[i].set_ylabel(f'{led_name} Current')
  axes[i].grid(True)

# Adjust layout (optional)
fig.suptitle('Scatter Plots of Total Current vs. Individual LED Currents')
plt.tight_layout()

# Show the plots
plt.show()

In [175]:
scaler = preprocessing.StandardScaler()
data = scaler.fit_transform(df_engineering)
print('\n')
print('scaler.fit_transform(x) - head(10):\n')
print(data[:10])



scaler.fit_transform(x) - head(10):

[[ 3.26316654  1.84615336]
 [-0.54226827 -0.74446667]
 [ 0.59943881  1.24011082]
 [-0.46268048 -0.28108636]
 [ 0.62177156  1.24011082]
 [-0.01460233  0.77673051]
 [ 0.5739313   1.38277305]
 [ 3.48244349  1.84615336]
 [-0.56733787 -1.38150533]
 [-0.48468481 -0.28108636]]


In [176]:
# Put the data in variable X and the answers in variable y
x = data[:, 0]  # Select the first column
y = data[:, 1]
print("\n\nx - head(10):\n")
print(x[:10])
print("\ny - head(10):\n")
print(y[:10])



x - head(10):

[ 3.26316654 -0.54226827  0.59943881 -0.46268048  0.62177156 -0.01460233
  0.5739313   3.48244349 -0.56733787 -0.48468481]

y - head(10):

[ 1.84615336 -0.74446667  1.24011082 -0.28108636  1.24011082  0.77673051
  1.38277305  1.84615336 -1.38150533 -0.28108636]


In [177]:
x = data[:, 0]  # Select the first column
y = data[:, 1]  # Select all columns except the first

# Assuming x and y are your data arrays
correlation_coefficient = np.corrcoef(x, y)[0, 1]
print("Correlation Coefficient:", correlation_coefficient)

Correlation Coefficient: 0.709604874025887


In [199]:
x = data[:, 0]  # Select the first column
y = data[:, 1]  # Select all columns except the first
x = x.reshape(-1, 1)

# Split the dataset into training (80%) and test (20%)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Define and train the ML model
clf = LinearRegression()
# clf = LogisticRegression()
# clf = svm.SVC(probability=True)
# clf = tree.DecisionTreeClassifier()
# clf = MLPClassifier(random_state=1,max_iter=500)
clf.fit(x_train, y_train)
# tree.plot_tree(clf) 


#implement the Cross-Validation (CV) technique
cv = KFold(n_splits=5)
print(f"\nNumber of folds: {cv.get_n_splits(x_train)}\n")
print(cv)


# # Create and train the model
# model = LogisticRegression()
# model.fit(x_train, y_train)

# Make predictions on the test set

# y_predicted = clf.predict(x_test)

# Define the exponential expression (replace with your actual function if needed)
def exponential_expression(x):
  return 0.0267*x**5 + -0.3416*x**4 + 1.4774*x**3 + -2.4925*x**2 + 1.5757*x + 0.9241

# Assuming x_test is a NumPy array
y_predicted = exponential_expression(x_test)  # Element-wise multiplication

# Print the result (optional)
print(y_predicted)

# # Evaluate the model's performance (e.g., using Mean Squared Error (MSE))
# from sklearn.metrics import mean_squared_error
# mse = mean_squared_error(y_test, y_predicted)
# print("Mean Squared Error:", mse)


Number of folds: 5

KFold(n_splits=5, random_state=None, shuffle=False)
[[ 8.04905320e-01]
 [ 7.94009991e-01]
 [ 1.00892686e+00]
 [-1.10028711e+00]
 [ 7.55954412e-01]
 [ 1.11442603e+00]
 [ 8.56886220e-02]
 [ 1.21673738e+00]
 [-5.58898861e-01]
 [-8.79805781e-01]
 [-1.11104891e+00]
 [-9.53927220e-01]
 [-7.17346939e-01]
 [ 1.18778162e+00]
 [ 1.80341304e+00]
 [-4.69705540e-01]
 [-1.18737409e+00]
 [-6.11234009e-01]
 [-9.51363146e-01]
 [ 1.13594957e+00]
 [-4.09209199e-01]
 [ 1.19276187e+00]
 [-5.56115120e-01]
 [ 1.25639694e-01]
 [-4.87505312e-01]
 [-8.78554466e-01]
 [-1.14354640e+00]
 [-9.88751762e-01]
 [ 1.20524592e+00]
 [-1.20880892e+00]
 [-8.62335133e-01]
 [-1.17087179e+00]
 [ 1.96465075e+00]
 [ 1.11412967e+00]
 [ 2.27123766e-01]
 [-5.71183541e-01]
 [-8.79805781e-01]
 [-4.82637935e-01]
 [ 1.97392397e-03]
 [-9.83567894e-01]
 [ 3.00495684e-02]
 [-8.39406252e-01]
 [-9.53286001e-01]
 [-1.11441921e+00]
 [-1.11644304e+00]
 [-7.22674153e-01]
 [-1.29092956e+00]
 [-1.15172065e+00]
 [ 1.13659856e+

In [200]:
# Plot the actual data (x vs y)
plt.scatter(x_test, y_test, label='Actual Data', color='blue')

# Plot the predicted values (x vs y_predicted)
plt.scatter(x_test, y_predicted, label='Predicted Data', color='red')

# Add labels and title
plt.xlabel('x')
plt.ylabel('y')
plt.title('Actual vs. Predicted Values')

# Add legend
plt.legend()

# Show the plot
plt.grid(True)  # Add grid for better visualization (optional)
plt.show()

In [185]:
from lmfit import Parameters, minimize

# Assuming data is a 2D array with x in the first column and y in the second
x = data[:, 0]  # Select the first column
y = data[:, 1]  # Select the second column

# Invert y values (assuming they represent positive values)
# y_inv = 1 / y
y_inv = y

params = Parameters()
params.add('a', value=1.0)  # Initial guess for parameter a
params.add('b', value=0.1)  # Initial guess for parameter b
params.add('c', value=0.0)  # Initial guess for parameter c (baseline)

# Define the objective function (difference between inverted y and model output)
def objective_function(params, x, y_inv):
  a = params['a']
  b = params['b']
  c = params['c']
  model_output = a * np.exp(-b * x) + c

  # Calculate squared errors for each data point
  squared_errors = (y_inv - model_output) ** 2

  # Return the total sum of squared errors
  return np.sum(squared_errors), 0, 0

# Fit the model to your data
result = minimize(objective_function, params, args=(x, y_inv))

# Access fitted parameters
a_fitted = result.params['a'].value
b_fitted = result.params['b'].value
c_fitted = result.params['c'].value

# Print the fitted function (consider reversing the transformation if applied)
print(f"Fitted function: y = {a_fitted} * exp(-{b_fitted} * x) + {c_fitted}")

# Optional: Calculate residuals (difference between inverted y and fitted model)
residuals = y_inv - (a_fitted * np.exp(-b_fitted * x) + c_fitted)
print(f"Residuals (sum of squared errors): {np.sum(residuals**2)}")


Fitted function: y = -0.31984337736363 * exp(-3.617298908434978 * x) + 1.292861663911042
Residuals (sum of squared errors): 111.90411344034275


In [198]:
from scipy.optimize import curve_fit

# Assuming data is a 2D array with x in the first column and y in the second
x = data[:, 0]  # Select the first column
y = data[:, 1]  # Select the second column

# Define cubic polynomial function
def poly_func(x, a, b, c, d, e, f):
  return a * x**5 + b * x**4 + c * x**3 + d * x**2 + e * x + f

# Perform curve fitting
popt, pcov = curve_fit(poly_func, x, y)

# Access fitted coefficients
a_fitted, b_fitted, c_fitted, d_fitted, e_fitted, f_fitted= popt

# Construct the fitted polynomial equation
fitted_equation = f"y = {a_fitted:.4f}x^5 + {b_fitted:.4f}x^4 + {c_fitted:.4f}x^3 + {d_fitted:.4f}x^2 + {e_fitted:.4f}x + {f_fitted:.4f}"

# Print the fitted equation
print(fitted_equation)

# (Optional) Visualization using matplotlib
import matplotlib.pyplot as plt

plt.plot(x, y, 'o', label='Data')
plt.plot(x, poly_func(x, *popt), label='Fitted Curve')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Fitted Cubic Polynomial')
plt.legend()
plt.grid(True)
plt.show()


y = 0.0267x^5 + -0.3416x^4 + 1.4774x^3 + -2.4925x^2 + 1.5757x + 0.9241
