In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
import joblib
import os
import pandas as pd
from gymnasium import spaces
from xgboost import XGBClassifier
import tensorflow as tf

In [2]:
import tensorflow as tf

unified_model_path = "G:\\Intern_Project_2025\\models\\Unified_classifier.keras"
unified_model = tf.keras.models.load_model(unified_model_path)

In [23]:
csv_path = "C:\\Users\\clombardi\\RL\\data.csv"
df = pd.read_csv(csv_path)
df['ImagePath'] = df['ImagePath'].apply(os.path.basename)

In [44]:
filtered_df = df[df['CNN_Predicition'] == 1]
ideal_tensions = dict(filtered_df.groupby('FiberType')['CleaveTension'].mean().astype(np.float32))

In [7]:
from tensorflow.keras.models import Model

feature_output = unified_model.get_layer("global_avg").output
feature_extractor = Model(inputs=unified_model.input, outputs=feature_output)

In [3]:
 def mask_background(img: tf.Tensor) -> tf.Tensor:
        """Mask background to prevent model from focusing on sharp gradient
        near edges.

        Args:
            img: Image tensor of shape (H, W, C)

        Returns:
            tf.Tensor: Image with circular mask applied
        """
        h = tf.shape(img)[0]
        w = tf.shape(img)[1]
        y_range = tf.range(h)
        x_range = tf.range(w)
        yy, xx = tf.meshgrid(y_range, x_range, indexing="ij")
        center_x = tf.cast(w, tf.float32) / 2.0
        center_y = tf.cast(h, tf.float32) / 2.0
        radius = tf.minimum(center_x, center_y)
        dist_from_center = tf.sqrt(
            (tf.cast(xx, tf.float32) - center_x) ** 2
            + (tf.cast(yy, tf.float32) - center_y) ** 2
        )

        mask = tf.cast(dist_from_center <= radius, tf.float32)
        mask = tf.expand_dims(mask, axis=-1)
        return img * mask

In [4]:
def load_process_images(filename: str, set_mask: bool) -> "tf.Tensor":
    """Load and preprocess image from file path.

    Args:
        filename: Image filename or path

    Returns:
        tf.Tensor: Preprocessed image tensor
    """

    if tf is None:
        raise ImportError("TensorFlow is required for image processing")

    def load_image(file):
        """Load an image and process using same preprocessing as backbone.

        Args:
            file: path to image
            preprocess_input: processing from backbone model

        Returns:
            loaded and resized image
        """
        full_path = os.path.join(img_folder, file)

        try:
            img_raw = tf.io.read_file(full_path)
        except FileNotFoundError:
            print(f"Image file not found: {full_path}")
            return None
        except Exception as e:
            print(f"Error loading image {full_path}: {e}")
            return None

        try:
            img = tf.image.decode_png(img_raw, channels=1)
            img = tf.image.resize(img, [224, 224])
            img = tf.image.grayscale_to_rgb(img)
            if set_mask == True:
                img = mask_background(img)
            return img
        except Exception as e:
            print(f"Error processing image {full_path}: {e}")
            return None

    img = load_image(filename)
    img.set_shape([224, 224, 3])
    return img

In [33]:
import os

img_folder = "C:\\Thorlabs\\combined_images"
features = []

pred_features = df[
                [
                    "CleaveAngle",
                    "CleaveTension",
                    "ScribeDiameter",
                    "Misting",
                    "Hackle",
                ]
            ].values

image_paths = df['ImagePath']

for img_path, feature_vector in zip(image_paths, pred_features):
    image = load_process_images(img_path, set_mask=False)
    image = np.expand_dims(image, axis=0)
    feature_vector = np.expand_dims(feature_vector, axis=0)
    feature_vector = np.zeros_like(feature_vector)
    feat = feature_extractor.predict([image, feature_vector])[0]
    features.append(feat)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45

In [65]:
features = np.array(features)
features.shape

(206, 1280)

In [86]:
class CleaveEnv(gym.Env):

    metadata = {'render_modes': ['human']}

    def __init__(self, csv_path, cnn_path, img_folder):
        
        super().__init__()

        self.cnn_model = tf.keras.models.load_model(cnn_path)
        self.img_folder = img_folder
        self.df = pd.read_csv(csv_path)

        filtered_df = self.df[self.df['CNN_Predicition'] == 1]
        self.ideal_tensions = dict(filtered_df.groupby('FiberType')['CleaveTension'].mean().astype(np.float32))
        
        len_fibers = len(self.df['FiberType'].unique())
        
        self.df = pd.get_dummies(self.df, columns=['FiberType'], dtype=np.int32)
        
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        self.max_tension_change = 10.0

        
        fiber_types = self.df.iloc[:, -len_fibers:]
        other_inputs = self.df['Diameter']
        
        combined_df = pd.concat([other_inputs, fiber_types], axis=1)
        
        self.context_df = combined_df
        observations_total = 1 + len(self.context_df.columns)

        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(observations_total,), dtype=np.float32)

        self.max_steps = 15
        self.current_step = 0
        self.current_context=None
        self.current_tension = 0
        self.render_mode = None

    def load_process_images(self, filename: str) -> "tf.Tensor":
        """Load and preprocess image from file path.
    
        Args:
            filename: Image filename or path
    
        Returns:
            tf.Tensor: Preprocessed image tensor
        """
    
        if tf is None:
            raise ImportError("TensorFlow is required for image processing")
    
        def load_image(file):
            """Load an image and process using same preprocessing as backbone.
    
            Args:
                file: path to image
                preprocess_input: processing from backbone model
    
            Returns:
                loaded and resized image
            """
            full_path = os.path.join(self.img_folder, file)
    
            try:
                img_raw = tf.io.read_file(full_path)
            except FileNotFoundError:
                print(f"Image file not found: {full_path}")
                return None
            except Exception as e:
                print(f"Error loading image {full_path}: {e}")
                return None
    
            try:
                img = tf.image.decode_png(img_raw, channels=1)
                img = tf.image.resize(img, [224, 224])
                img = tf.image.grayscale_to_rgb(img)
                return img
            except Exception as e:
                print(f"Error processing image {full_path}: {e}")
                return None
    
        img = load_image(filename)
        img.set_shape([224, 224, 3])
        return img

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.current_context = self.context_df.sample(n=1, random_state=self.np_random)
        self.current_ideal_tension = self.ideal_tensions[self._get_current_fiber_type()]
        self.current_tension = self.np_random.uniform(low=self.current_ideal_tension*(0.8), high=self.current_ideal_tension*(1.2))
        self.current_step = 0

        observation = self._create_observation()

        if self.render_mode == "human":
            print("\n---------------EPISODE RESET----------------------")
            print(f"New Scenario: Fiber = {self._get_current_fiber_type()} Start Tension = {self.current_tension:.0f}")

        return observation, {}

    def step(self, action):
        delta_tension = float(action[0] * self.max_tension_change)
        self.current_tension = self.current_tension + delta_tension
        self.current_tension = np.clip(self.current_tension, 50, 2000)
        self.current_ideal_tension = self.ideal_tensions[self._get_current_fiber_type()]
        
        self.current_step = self.current_step + 1

        model_inputs = self.current_context.copy()
        model_inputs['CleaveTension'] = self.current_tension

        row_index = self.current_context.index[0]
        image_filename = self.df.iloc[row_index]['ImagePath']
        image_tensor = self.load_process_images(image_filename)
        
        image_tensor = tf.expand_dims(image_tensor, axis=0)
        
        dummy_features = np.zeros((1, 5)) 
        cnn_raw = self.cnn_model.predict([image_tensor, dummy_features], verbose=0)
        cnn_pred = cnn_raw[0][0]

        terminated = False
        if cnn_pred >= 0.63:
            reward = 100.0
            terminated = True
        else:
            reward = 50.0 * cnn_pred - 3.0 * (1 - cnn_pred)

        SAFE_DELTA_THRESHOLD = 5.0

        if abs(delta_tension) <= SAFE_DELTA_THRESHOLD:
            reward += 1.5
        else:
            reward -= 0.25 * (abs(delta_tension) - SAFE_DELTA_THRESHOLD)

        tension_error = abs(self.current_tension - self.current_ideal_tension)
        reward += max(0, 1 - (tension_error / self.current_ideal_tension)) * 20.0

        action_cost = 0.1 * abs(delta_tension)
        reward = reward - action_cost

        truncated = self.current_step >= self.max_steps
        if truncated and not terminated:
            reward = reward - 25.0

        if self.render_mode == "human":
            self.render(action, cnn_pred, reward)
        observation = self._create_observation()
        return observation, float(reward), terminated, truncated, {}

    def _get_current_fiber_type(self):
        for col_name in self.current_context.columns:
            if 'FiberType_' in col_name and self.current_context[col_name].iloc[0] == 1.0:
                return col_name.replace('FiberType_', '')
        return "Unknown"

    def _create_observation(self):
        return np.concatenate([
            [self.current_tension],
            self.current_context.values[0]
        ]).astype(np.float32)

    def render(self, action, cnn_pred, reward):
        action_str = f"{(action[0] *10.0):+.2f}"
        cnn_str = "GOOD" if cnn_pred > 0.63 else "BAD"
        print(f"Step {self.current_step:2d} Tension: {self.current_tension:6.1f} (Action: {action_str:6s}) -> CNN: {cnn_str:4s}| Reward: {reward:6.1f}")
    

In [87]:
csv_path = "C:\\Users\\clombardi\\RL\\data.csv"
cnn_path= "G:\\Intern_Project_2025\\models\\Unified_classifier.keras"
img_folder="C:\\Thorlabs\\combined_images"
env = CleaveEnv(csv_path=csv_path, cnn_path=cnn_path, img_folder=img_folder)

In [88]:
from stable_baselines3 import SAC
from stable_baselines3.common.env_checker import check_env
import os

check_env(env)

In [89]:
agent = SAC(
    "MlpPolicy",            
    env,
    device="cuda",
    verbose=0,
    buffer_size=1000000,
    ent_coef='auto',
    learning_rate=3e-4,
    batch_size=1028,
    tau=0.1
)

In [90]:
agent.learn(total_timesteps=5000, progress_bar=True)

Output()

<stable_baselines3.sac.sac.SAC at 0x2358c3e8430>

In [96]:
agent_save_path = "C:\\Users\\clombardi\\RL\\agent7"
#agent.save(agent_save_path)

In [97]:
trained_agent = SAC.load(agent_save_path)
eval_env = CleaveEnv(csv_path=csv_path, cnn_path=cnn_path, img_folder=img_folder)
eval_env.render_mode = "human"

In [101]:
for episode in range(5):
        obs, info = eval_env.reset()
        done = False
        episode_reward = 0
        while not done:
            action, _ = trained_agent.predict(obs, deterministic=True)
            
            obs, reward, terminated, truncated, info = eval_env.step(action)
            
            episode_reward += reward
            done = terminated or truncated

        print(f"Episode {episode + 1} finished with a total reward of: {episode_reward:.2f}")

eval_env.close()


---------------EPISODE RESET----------------------
New Scenario: Fiber = 400LA Start Tension = 1308
Step  1 Tension: 1300.6 (Action: -7.04 ) -> CNN: BAD | Reward:   17.8
Step  2 Tension: 1293.9 (Action: -6.70 ) -> CNN: BAD | Reward:   18.1
Step  3 Tension: 1287.6 (Action: -6.33 ) -> CNN: BAD | Reward:   18.3
Step  4 Tension: 1281.7 (Action: -5.90 ) -> CNN: BAD | Reward:   18.6
Step  5 Tension: 1276.3 (Action: -5.46 ) -> CNN: BAD | Reward:   18.9
Step  6 Tension: 1271.2 (Action: -5.03 ) -> CNN: BAD | Reward:   19.2
Step  7 Tension: 1266.6 (Action: -4.61 ) -> CNN: BAD | Reward:   20.8
Step  8 Tension: 1262.4 (Action: -4.21 ) -> CNN: BAD | Reward:   20.9
Step  9 Tension: 1258.6 (Action: -3.83 ) -> CNN: BAD | Reward:   21.1
Step 10 Tension: 1255.1 (Action: -3.46 ) -> CNN: BAD | Reward:   21.2
Step 11 Tension: 1252.2 (Action: -2.93 ) -> CNN: BAD | Reward:   21.3
Step 12 Tension: 1249.7 (Action: -2.45 ) -> CNN: BAD | Reward:   21.4
Step 13 Tension: 1247.7 (Action: -2.03 ) -> CNN: BAD | Rewa