In [1]:
import numpy as np

# Define network attributes and their weights (importance)
attributes = ['bandwidth', 'price', 'cell_radius', 'security', 'power_consumption', 'traffic']
attribute_weights = {'bandwidth': 0.2, 'price': 0.2, 'cell_radius': 0.1, 'security': 0.15, 'power_consumption': 0.15, 'traffic': 0.2}

# Define user preferences for applications and additional preferences (money, quality, battery life, mobility)
class User:
    def __init__(self, application, preference1, preference2, preference3):
        self.application = application
        self.preference1 = preference1  # e.g., 'money', 'quality'
        self.preference2 = preference2  # e.g., 'battery', 'mobility'
        self.preference3 = preference3  # e.g., 'money', 'quality'

# Define network class with attributes
class Network:
    def __init__(self, name, bandwidth, price, cell_radius, security, power_consumption, traffic):
        self.name = name
        self.bandwidth = bandwidth
        self.price = price
        self.cell_radius = cell_radius
        self.security = security
        self.power_consumption = power_consumption
        self.traffic = traffic

# Define a basic Markov decision process (MDP) framework for network selection
class NetworkSelectionMDP:
    def __init__(self, user, networks, actions):
        self.user = user
        self.networks = networks
        self.actions = actions
        self.state_space = [tuple(network_attributes) for network_attributes in self._generate_state_space()]
        self.action_space = list(range(len(actions)))
        self.q_table = np.zeros((len(self.state_space), len(self.action_space)))

    def _generate_state_space(self):
        # Generate all possible combinations of network attributes
        state_space = []
        for network in self.networks:
            state = [getattr(network, attr) for attr in attributes]
            state_space.append(state)
        return state_space

    def _calculate_utility(self, state_idx):
        # Calculate utility based on the current state (network attributes)
        state = self.state_space[state_idx]
        utility = sum(state[i] * attribute_weights[attr] for i, attr in enumerate(attributes))

        # Apply user preferences to adjust utility
        if self.user.application == 'conversational':
            if self.user.preference1 == 'money':
                utility -= state[attributes.index('price')] * 0.2  # Decrease utility for higher prices
        elif self.user.application == 'streaming':
            if self.user.preference1 == 'quality':
                utility += state[attributes.index('bandwidth')] * 0.2  # Increase utility for higher bandwidth
        elif self.user.application == 'interactive':
            if self.user.preference2 == 'battery':
                utility -= state[attributes.index('power_consumption')] * 0.3  # Decrease utility for higher power consumption
            elif self.user.preference2 == 'mobility':
                utility += state[attributes.index('cell_radius')] * 0.3  # Increase utility for higher cell radius

        return utility

    def _transition(self, state_idx, action_idx):
        # Transition to a new state based on the selected action (network selection)
        next_state_idx = action_idx  # Assume direct transition to the selected network (action index)
        return next_state_idx

    def train(self, num_episodes=100, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        # Train the Q-learning agent to learn the optimal policy
        for _ in range(num_episodes):
            state_idx = np.random.randint(0, len(self.state_space))  # Random initial state
            for _ in range(len(self.networks)):  # Maximum steps = number of networks
                if np.random.uniform(0, 1) < epsilon:
                    action_idx = np.random.choice(self.action_space)  # Explore (random action)
                else:
                    action_idx = np.argmax(self.q_table[state_idx])  # Exploit (best action)

                next_state_idx = self._transition(state_idx, action_idx)
                reward = self._calculate_utility(next_state_idx)

                # Update Q-value using Q-learning update rule
                self.q_table[state_idx, action_idx] += learning_rate * (
                    reward + discount_factor * np.max(self.q_table[next_state_idx]) - self.q_table[state_idx, action_idx])

                state_idx = next_state_idx  # Transition to the next state

    def select_network(self):
        # Select the best network based on the learned Q-values (optimal policy)
        state_idx = np.argmax(self.q_table[0])  # Start from the initial state (index 0)
        best_network = self.networks[state_idx]
        return best_network

# Example usage:
if __name__ == "__main__":
    # Create user with specified application and preferences
    user = User(application='conversational', preference1='money', preference2='battery', preference3='quality')

    # Create network instances with simulated attributes
    networks = [
                Network("WWAN", bandwidth=2, price=50, cell_radius=2000, security=3, power_consumption=1/100, traffic=50),
        Network("WMAN", bandwidth=10, price=20, cell_radius=2000, security=3, power_consumption=1/100, traffic=70),
        Network("WLAN", bandwidth=54, price=5, cell_radius=75, security=1, power_consumption=1/50, traffic=90),
        Network("WPAN", bandwidth=1, price=1, cell_radius=10, security=2, power_consumption=1/1000, traffic=90)
    ]

    # Define network selection actions (networks to choose from)
    actions = networks

    # Create a NetworkSelectionMDP instance and train the Q-learning agent
    mdp = NetworkSelectionMDP(user, networks, actions)
    mdp.train(num_episodes=1000)

    # Select the best network based on the learned policy
    best_network = mdp.select_network()
    print(f"Selected network: {best_network.name}")


Selected network: WMAN
