<a href="https://colab.research.google.com/github/darshita27-cmd/Traffic-Management-system-using-Deep-Q/blob/main/DQN_traffic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

DQN-traffic management

- the system observes traffic density, waiting time, emergency vechiles
- it takes actions like adjusting signal timeing or prioritizing a lane
- it receives a reward based on the flow improvement

In [None]:
!pip install tensorflow gym[all] sumo_rl # tensorflow for deep Q network(DQN), gym[all] for for reinforcement learning environment, traci for interface for controlling sumo, sumo_rl is the library for integrating sumo with RL

# Uninstall the standalone traci package (if installed)
!pip uninstall -y traci

# Install SUMO
!apt update
!apt install -y sumo sumo-tools sumo-doc  # sumo is the main traffic simulator, sumo-tools for handling the traffic data

# Verify SUMO installation
!sumo --version

# Create the sumo_files directory
!mkdir -p /content/sumo_files
%cd /content/sumo_files

# Download the raw files
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4.net.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4c1.rou.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4c1c2.rou.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4c1c2c1c2.rou.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4c2.rou.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4c2c1.rou.xml
!wget https://raw.githubusercontent.com/LucasAlegre/sumo-rl/main/sumo_rl/nets/4x4-Lucas/4x4.sumocfg


# List files in the directory
%cd /content/sumo_files
!ls

# Set SUMO_HOME environment variable
import os
os.environ['SUMO_HOME'] = '/usr/share/sumo'

# Add SUMO tools to Python path
import sys
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")

# Load SUMO in Python
import traci

# Kill any existing SUMO processes
!pkill -f sumo

# Close any existing traci connection (if any)
try:
    traci.close()
except:
  pass  # Ignore if no connection exists

# Start SUMO simulation
sumo_cmd = ["sumo", "-c", "/content/sumo_files/4x4.sumocfg"]

# Start traci with retries and a specific port
traci.start(sumo_cmd)
print("SUMO and traci are successfully installed and running!")



Collecting sumo_rl
  Using cached sumo_rl-1.4.5-py3-none-any.whl.metadata (12 kB)
Collecting mujoco==2.2.0 (from gym[all])
  Using cached mujoco-2.2.0.tar.gz (138 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pygame==2.1.0 (from gym[all])
  Using cached pygame-2.1.0.tar.gz (5.8 MB)
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran4

In [None]:
!echo $SUMO_HOME

/usr/share/sumo


In [None]:
#custom RL environment
import gym # for RL environment. it sandardizes the interface for environments
import numpy as np # for array and mathematical functions
from gym import spaces # spaces defines the types of actions and observations an agent can take or receive in environment


reward calculations
- negative for hugher congestion
+ positive for smooth traffic flow
- higher penalty if emergency vehicles are delayed

In [None]:
class VehicleParameters:
    def __init__(self): # for all types of vehicles that will be idetified
        self.bicycle = {'count': 0, 'priority': 0.2, 'speed_factor': 0.7}
        self.bike = {'count': 0, 'priority': 0.3, 'speed_factor': 1.0}
        self.bus = {'count': 0, 'priority': 0.8, 'speed_factor': 0.9}
        self.car = {'count': 0, 'priority': 0.5, 'speed_factor': 1.0}
        self.cng = {'count': 0, 'priority': 0.4, 'speed_factor': 0.8}
        self.e_rickshaw = {'count': 0, 'priority': 0.3, 'speed_factor': 0.6}
        self.horse_cart = {'count': 0, 'priority': 0.1, 'speed_factor': 0.5}
        self.tractor = {'count': 0, 'priority': 0.2, 'speed_factor': 0.7}
        self.truck = {'count': 0, 'priority': 0.7, 'speed_factor': 0.8}
        self.wheelbarrow = {'count': 0, 'priority': 0.1, 'speed_factor': 0.4}
        self.types = ['bicycle', 'bike', 'bus', 'car', 'cng','e_rickshaw', 'horse_cart', 'tractor','truck', 'wheelbarrow']

    def reset_counts(self): # to get count vehicle to zero
        for veh_type in self.types:
            self.__dict__[veh_type]['count'] = 0

    def get_counts_array(self): # count the array formed
        return np.array([self.__dict__[veh_type]['count'] for veh_type in self.types])

    def get_priority_array(self): # returns the priority array
        return np.array([self.__dict__[veh_type]['priority'] for veh_type in self.types])

class TrafficEnv(gym.Env): # we used gym.Env as it is a base class provideded by the open ai gym. trafficEnv is the subclass as it inherits the gym.Env
  def __init__(self):
    super(TrafficEnv, self).__init__()  # this line ask the gym.Env to setup things first than the trafficEnv willl set thingsup

    # observation space: traffic density, waiting time, emergency vehicle
    self.observation_space= spaces.Box(low=0, high=100, shape=(3+len(self.vehivle_parms.types)), dtype=np.float32) # self.observation means that agent can observe environment at any time. spaces.Box is i gym.spaces arepresents the continous values it means that the observations will not be discrete they will be continous range. low =0 means the minimum value of each dimention( density, waiting time, emergency vechile) is 0. high=100 means the maximum dimension will be 100. shape=3 means that there are three dimensions (density, wauting time, emergency vehicle)

    # action space; change signal timing, prioritize certain lanes
    self.action_space= spaces.Discrete(3) # 3 actions: 0 keep same, 1 increase, 2 decrease time

  def update_vehicle_counts(self): # count ech vehicle count in current state
    self.vehicle_params.reset_counts()
    for veh_id in traci.vehicle.getIDList():
      for veh_type in self.vehicle_params.types:
        if veh_id.startswith(veh_type):
          self.vehicle_params.__dict__[veh_type]['count'] += 1
          break

  def step(self, action):
      traci.simulationStep() # calling the library
      self.update_vehicle counts()
      # get trffic parameters from sumo
      traffic_density= np.random.randint(0,100) # cars waiting at the intersection
      wait_time=np.random.randint(0,50) # avg wait time in seconds
      emergency_vehicle=np.random.choice([0,1], p=[0.9,0.1]) # 10% chance an emergency vehicle us present
      edge_ids=[":0_0", ":1_0", ":2_0", ":3_0", ":4_0"]
      for edge_id in edge_ids:
        vehicles_passed=traci.edge.getLastStepVehicleNumber(edge_id) # number of vehicles that passes to encourage the agent
      reward=0

      reward-=traffic_density*0.1 # more traffic, lower reward
      reward -=wait_time *1 # more waiting, lower reward
      reward+=vehicles_passed*2 # reward for smooth traffic flow
      if emergency_vehicle:
        reward -=50 # higher penalty for blocking emergency vehicles
      else:
        reward+=20 # reward for not blocking emergency vechiles


      # apply action in environment and improved traffic
      if action==1: # increase green light duration
        reward += 30 if traffic_density > 50 else -10
      elif action==2: # decrease green light duration
        reward += 20 if traffic_density < 30 else -15

      next_state=np.array([traffic_density, wait_time, emergency_vehicle], dtype=np.float32)
      done=traci.simulation.getMinExpectedNumber()<=0 # continue until the stimulation stops

      return next_state, reward, done, {}


  def reset(self):
    self.update_vehicle_counts()
    random_state = np.random.rand(3 + len(self.vehicle_params.types)) * 100 # return random state with vehicle counts
    return random_state.reshape(1, 3 + len(self.vehicle_params.types)) # reset with a random state


implementing the deep q network

In [None]:
import tensorflow as tf # for deep learning, neural networks
from tensorflow.keras.models import Sequential # sequential helps to build the network layer one by one where each layer has exactly one input tensor and one output tensor
from tensorflow.keras.layers import Dense # dense layer create the hidden layers and the output layers.
from tensorflow.keras.optimizers import Adam # adam (adaptive moment estimator) it adjusts the learning parameter at the forst and the secon moment of the gradients

def build_model(state_size, action_size):
  model = Sequential([
      Dense(24, input_dim=state_size, activation='relu'), # this layer is fully connected to 24 neurons. relu is the activation function for non-lineaarity dunctions.
      Dense(24, activation='relu'), # hidden layer
      Dense(action_size, activation='linear') # output layer
  ])
  model.compile(loss='mse', optimizer=Adam(learning_rate=0.0005)) # mse= mean squrae error. it is used to find the difference between the predicted and the actual values
  return model

In [None]:
# training the DQN agent
import random
import numpy as np
from collections import deque

class DQNAgent:
  def __init__(self, state_size, action_size):
    self.state_size=state_size
    self.action_size=action_size
    self.memory=deque(maxlen=2000)
    self.gamma= 0.99 # discount factor values how the immediate rewars are valued to immediate reward
    self.epsilon=0.5 # exploration rate defines how often the agent will explore
    self.epsilon_min=0.01 # this is also the exploration rate. as the agent learns the exploration rate will decay to the minimum value
    self.epsilon_decay=0.99# gradually reducing the exploration over time
    self.model=build_model(state_size, action_size)
    self.target_model=build_model(state_size,action_size)
    self.update_target_model()

  def update_target_model(self):
    self.target_model.set_weights(self.model.get_weights())

  def act(self,state):
    if np.random.rand() <=self.epsilon:
      return random.randrange(self.action_size) # if the random number is less than the self.epsilon than the agent will explore
    return np.argmax(self.model.predict(state)[0])


  def replay(self, batch_size=32):
     minibatch=random.sample(self.memory, batch_size) # random sample are taken and sorted in minibatch
     for state,action,reward,next_state in minibatch: # training for each bach
      target=reward+self.gamma*np.max(self.model.predict(next_state)[0]) # bellman's equation
      target_f= self.model.predict(state) # predicying the current q value
      target_f[0][action]=target # updating the q value for action taken
      self.model.fit(state, target_f, epochs=1, verbose=0) # verbose suppresses output to make it less noisy


  def remember(self, state, action, reward, next_state):
      self.memory.append((state, action, reward, next_state))

  def update_epsilon(self):  # decreasing the exploration rate with experience
    if self.epsilon>self.epsilon_min:
      self.epsilon*=self.epsilon_decay

In [None]:
# training loop
import matplotlib.pyplot as plt # for visulation
episode_rewards=[]
episode_wait_times=[]
episode_traffic_densities=[]

env=TrafficEnv()
agent=DQNAgent(state_size=3,action_size=3)
episodes=500
batch_size=32
for episode in range (episodes):
  state=env.reset().reshape(1,3) # reshaping the state of 2D array. the 1st dimension will represent the current episode and the second dimension will have the three features (waiting time and all). this is importnt as ML model expects input in a specific shape
  total_reward=0 # initial reward after rest shoul be 0
  total_wait_time=0
  total_traffic_density=0

  for time in range(200):
    action=agent.act(state) # determines what action should be taken in the current state
    next_state,reward,done,_=env.step(action) # the choosen action will impact the environment and that will return the 4 values (nex_state, reward, done(boolean flag indicating whether the episode has ended),_ (placeholder or to store the values that are returned but were not needed))
    next_state=next_state.reshape(1,3)

    total_reward+=reward
    total_wait_time+=next_state[0][1] # will be represented int the second column
    total_traffic_density+=next_state[0][0]  # willl be represented in the first column
    agent.remember(state,action,reward,next_state) # saving the current experience in the memory which consists of state, action,reward,next_state
    state=next_state

    if done:
      break

  episode_rewards.append(total_reward)
  episode_wait_times.append(total_wait_time/time) # average wait time
  episode_traffic_densities.append(total_traffic_density/time) # average traffic density

  if len(agent.memory)>batch_size:
    agent.replay(batch_size)
  agent.update_epsilon()
  print(f"episode {episode+1}, total reward: {total_reward}, avg wait time:{total_wait_time/time}, avg traffic density: {total_traffic_density/time}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/s

In [None]:
# ploting the metrics
# Plot total rewards
plt.figure(figsize=(5, 4))
plt.plot(episode_rewards, label="Total Reward")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("Total Reward per Episode")
plt.legend()
plt.show()

# Plot average waiting time
plt.figure(figsize=(5, 4))
plt.plot(episode_wait_times, label="Average Waiting Time")
plt.xlabel("Episode")
plt.ylabel("Waiting Time (s)")
plt.title("Average Waiting Time per Episode")
plt.legend()
plt.show()

# Plot average traffic density
plt.figure(figsize=(5, 4))
plt.plot(episode_traffic_densities, label="Average Traffic Density")
plt.xlabel("Episode")
plt.ylabel("Traffic Density")
plt.title("Average Traffic Density per Episode")
plt.legend()
plt.show()

In [None]:
# testing the trained model
state=env.reset().reshape(1,3)
total_reward=0
for _ in range(200):
  action=np.argmax(agent.model.predict(state)[0])
  next_state,reward,done,_=env.step(action)
  next_state=next_state.reshape(1,3)
  state=next_state
  total_reward+=reward
print(f"total reward after testing: {total_reward}")
traci.close()


CNN to get numerical

In [None]:
# to convert images to numerical data for kmean and others clustering
import torch # provides environment for traininga and building neural networks
from torchvision import models,transforms # provides tools needed for computer vision tasks. includes pretrained datasets and for image transformation. model has many pretrained models and transforms help to normalize,cropping,resizing images
from PIL import Image # for the manipulation of image format
model=models.resnet18(pretrained=True) # models.resnet18 is a type of convolutional neural network (CNN) that is used for image classification. ResNet(Residual Network) trains very deep network models by using Recidual connections that helps vanishing the gradient problem.residula connection mean that if the optimal function is close to the identity function than the input will be passed without modification. its easier because wother way it would have to learn a complete new function from scatch. loss function in the gradient can be lost or become too small due to so many layers, this model helps with that too. pretrained=True mean want to load the dataset that has already been traied on a large dataset
model=torch.nn.Sequential(*(list(model.children())[:-1])) # model children is used for used to access the layers in resnet18, few of the layers are such: convolutional layers,batch normalization layer,fully connected layers. list(model.children() return is the list of layers used. [:1] is used to get every element in the resnet model used except the last one which consists of classification head of model. * is used so that layers can be passed as an individual arguments. torch.nn.Sequential will have all the alyers used except the last one. it will make tham in a stack like
model.eval() # effects of model.eval(): it drop layers randomly. it put a fraction of that layer as 0 to prevent overfitting. normalize the input through there mean or variance
preprocess=transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225000]),]) # transforms.Compose function used to combine multiple image transformation operations into single callable obeject. transorms.Resize(256) resize the imgae of 256X256 pixel. transforms.CenterCrop(224) crops the center of the image to size 224X244. transforms.ToTensor() cnverts PIL image or numpy array to PyTorch sensor, the pixel value from range [0,255] 8 bits is converted to [0.0,1.0] which is more suitable for neural network. transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225]) normalizes the tensorr image with specific mean and standard deviation for each color channel (RGB)
def extract_features(image_path):
  img=Image.open(image_path).convert('RGB') # opens image through PIL and convert them to RGB as that's what mode expects
  img_t=preprocess(img).unsqueeze(0) # preprocess(image) applies the preprocessing as stated earlier like cropping,converting to tensor, normalize. unsqueeze(0) adds additional dimention to tensor. first it was like[number of channel.height,width] than it will be like [1, number of channel,height,width] adding this imentions indicates that there is one image in the batch
  with torch.no_grad(): # with torch.no_grad() disables the gradient calculation. we will be calculating the class probability so gradient weights won't be nesseary  this speeds up the process
    features=model(img_t) # passes the processed image tensor through the model to obtain feature representation.
  return features.squeeze().numpy() # removes the dimntion with 1.
features=extract_features('/content/bicycle-4-_jpg.rf.1d34601ac1ff212628e4e1d846ffff3a.jpg')
print(features)



[1.21937704e+00 1.24675608e+00 5.52422464e-01 6.28771663e-01
 8.51180017e-01 9.62879121e-01 8.69618773e-01 1.17276287e+00
 2.76014626e-01 2.00063634e+00 3.10463727e-01 6.59313977e-01
 2.36789823e+00 7.02093720e-01 5.97046971e-01 6.63211226e-01
 5.61763234e-02 3.93819541e-01 4.65655893e-01 6.14709914e-01
 9.24551189e-01 2.21085548e-01 9.82744277e-01 3.08293793e-02
 1.13175642e+00 1.85688838e-01 5.25756776e-01 1.30346274e+00
 2.18493271e+00 9.39885259e-01 7.04993829e-02 2.36524248e+00
 2.92332321e-01 9.92375910e-01 1.13783114e-01 3.37565631e-01
 1.00939119e+00 5.33221185e-01 2.05926681e+00 3.49868655e-01
 2.52572596e-01 1.26261795e+00 5.16133070e-01 8.15236986e-01
 7.33229280e-01 7.64921367e-01 1.07275510e+00 6.39972985e-01
 1.29452991e+00 7.57596612e-01 6.84816251e-03 5.65636456e-01
 1.51914030e-01 9.15681541e-01 7.22380102e-01 1.20441630e-01
 8.35248232e-01 1.88511517e-03 2.03837776e+00 9.08465683e-02
 2.47288436e-01 1.57824588e+00 1.89894068e+00 1.19947600e+00
 7.05487728e-01 3.951288

In [None]:
import numpy as np
import glob
image_folder = "/path/to/your/images/*.jpg"
image_paths = glob.glob(image_folder)
all_features = []

for path in image_paths:
    features = extract_features(path)
    all_features.append(features)

all_features = np.array(all_features)  # Shape: (n_images, 512)
print(f"Loaded {len(all_features)} images.")

k means

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(all_features)
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=10, random_state=42) # 10 clusters willl be formed
clusters = kmeans.fit_predict(scaled_features)
print(clusters)  # Array of cluster IDs (0 to 9)
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
pca = PCA(n_components=2) # reduced to 2D array. only 2 features are taken
features_2d = pca.fit_transform(scaled_features)
plt.scatter(features_2d[:, 0], features_2d[:, 1], c=clusters, cmap='viridis')
plt.title("cehicle clusters (PCA Reduced)")
plt.colorbar(label="cluster ID")
plt.show()

from sklearn.metrics import silhouette_score

scores = []
for k in range(2, 15):
    kmeans = KMeans(n_clusters=k, random_state=42)
    preds = kmeans.fit_predict(scaled_features)
    scores.append(silhouette_score(scaled_features, preds))

plt.plot(range(2, 15), scores)
plt.xlabel("Number of Clusters (k)")
plt.ylabel("Silhouette Score")
plt.show()


hierarical clustering

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

Z = linkage(all_features, method='ward')  # all_features shape: (n_samples, 512). ward is for minimum varience
plt.figure(figsize=(12, 6))
dendrogram(Z, truncate_mode='lastp', p=20, show_leaf_counts=True) # lastp means only thr last merged cluster will be displayed
plt.title("Hierarchical Clustering Dendrogram (Truncated)")
plt.xlabel("Vehicle Samples")
plt.ylabel("Distance (Ward)")
plt.show()
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import fcluster
clusters = fcluster(Z, t=10, criterion='maxclust')  # t=no. of clusters
sil_score = silhouette_score(all_features, clusters)
print(f"Silhouette Score: {sil_score:.4f}")

DBSCAN

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(all_features)
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=3.0, min_samples=5, metric='euclidean') #eps is the maximum distance accepted for two points to in the same cluster. min_sample means minimum data points needed to form a cluster
clusters = dbscan.fit_predict(scaled_features)
n_clusters = len(set(clusters)) - (1 if -1 in clusters else 0)  # Ignore noise (-1)
n_noise = list(clusters).count(-1)
print(f"Estimated clusters: {n_clusters}, Noise points: {n_noise}")
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

# Find the k-nearest neighbors (k = min_samples)
nn = NearestNeighbors(n_neighbors=5).fit(scaled_features)
distances, _ = nn.kneighbors(scaled_features)
distances = np.sort(distances[:, -1], axis=0)  # Sort distances to the 5th neighbor

# Plot
plt.plot(distances)
plt.xlabel("Points sorted by distance")
plt.ylabel("5th nearest neighbor distance")
plt.title("k-Distance Graph for DBSCAN")
plt.grid()
plt.show()

from sklearn.decomposition import PCA
pca = PCA(n_components=2)
features_2d = pca.fit_transform(scaled_features)
plt.scatter(features_2d[:, 0], features_2d[:, 1], c=clusters, cmap='viridis', alpha=0.5)
plt.title("DBSCAN Clusters (PCA-Reduced)")
plt.colorbar(label="Cluster ID")
plt.show()
from sklearn.metrics import silhouette_score
import numpy as np
# Exclude noise points (cluster = -1)
mask = clusters != -1
filtered_features = scaled_features[mask]
filtered_clusters = clusters[mask]

if len(np.unique(filtered_clusters)) > 1:  # At least 2 clusters needed
    silhouette = silhouette_score(filtered_features, filtered_clusters)
    print(f"Silhouette Score (excluding noise): {silhouette:.4f}")
else:
    print("Cannot compute Silhouette Score: Only 1 cluster found (after noise removal).")
from sklearn.metrics import silhouette_samples # now will try to get the score for teach cluster formed
import matplotlib.cm as cm
if len(np.unique(filtered_clusters)) > 1:
    sample_silhouette = silhouette_samples(filtered_features, filtered_clusters) # silhouette scores for each sample
    plt.figure(figsize=(10, 6))
    y_lower = 10
    for i in np.unique(filtered_clusters):
        cluster_silhouette = sample_silhouette[filtered_clusters == i]
        cluster_silhouette.sort()
        y_upper = y_lower + len(cluster_silhouette)

        color = cm.viridis(float(i) / len(np.unique(filtered_clusters)))
        plt.fill_betweenx(np.arange(y_lower, y_upper),
                          0, cluster_silhouette,
                          facecolor=color, edgecolor=color, alpha=0.7) # apla is for the penaltuy
        plt.text(-0.05, y_lower + 0.5 * len(cluster_silhouette), str(i))
        y_lower = y_upper + 10

    plt.axvline(x=np.mean(sample_silhouette), color="red", linestyle="--")
    plt.title("Silhouette Plot for DBSCAN Clusters (Noise Excluded)")
    plt.xlabel("Silhouette Coefficient")
    plt.ylabel("Cluster ID")
    plt.show()
else:
    print("Not enough clusters for silhouette analysis.")