<a href="https://colab.research.google.com/github/amara929/amara929/blob/main/Image_Generation_with_GANs_using_MNIST_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Objective
Model user navigation as an MDP where each page has rewards (e.g., a reward for reaching "Checkout") and find the optimal policy (best next page from any given page) to maximize user conversion.



Step-by-Step MDP + Value Iteration

In [None]:
# Step 1: Define the states (web pages) and rewards
states = ['Home', 'About', 'Products', 'Cart', 'Contact', 'Checkout']
R = {
    'Home': 0,
    'About': 0,
    'Products': 1,
    'Cart': 2,
    'Contact': -1,
    'Checkout': 10  # High reward for conversion
}

# Define or import the transition matrix from the previous project
# Example: If it was stored in a file called 'previous_project_data.py', you would import it like this:
# from previous_project_data import transition_matrix

# If you don't have the previous data, you'll need to create it here.
# Below is a placeholder example:
transition_matrix = {
    'Home': {'Home': 0.2, 'About': 0.3, 'Products': 0.5, 'Cart': 0, 'Contact': 0, 'Checkout': 0},
    # ... (define transitions for other states similarly)
}

# Now you can print it
print("Transition Matrix:\n", transition_matrix)

Transition Matrix:
 {'Home': {'Home': 0.2, 'About': 0.3, 'Products': 0.5, 'Cart': 0, 'Contact': 0, 'Checkout': 0}}


In [None]:
# Step 2: Define MDP parameters
gamma = 0.9  # Discount factor
theta = 0.001  # Threshold for convergence

In [None]:
# Step 3: Initialize utilities of all states to 0
U = {s: 0 for s in states}

In [None]:
!pip install pandas

import pandas as pd # Import the pandas library

# ... (your existing code) ...

# Convert transition_matrix to a Pandas DataFrame
transition_matrix = pd.DataFrame(transition_matrix).fillna(0)

# Now you can print it
print("Transition Matrix:\n", transition_matrix)

Transition Matrix:
           Home
Home       0.2
About      0.3
Products   0.5
Cart       0.0
Contact    0.0
Checkout   0.0


In [None]:
# Step 4: Value Iteration
def value_iteration(matrix, rewards, gamma, theta):
    U = {s: 0 for s in matrix.index}
    iteration = 0
    while True:
        delta = 0
        new_U = U.copy()
        for state in matrix.index:
            expected_utilities = []
            for next_state in matrix.columns:
                prob = matrix.loc[state, next_state]
                expected_utilities.append(prob * U[next_state])
            new_U[state] = rewards[state] + gamma * sum(expected_utilities)
            delta = max(delta, abs(new_U[state] - U[state]))
        U = new_U
        iteration += 1
        if delta < theta:
            break
    print(f"Converged after {iteration} iterations.")
    return U

U = value_iteration(transition_matrix, R, gamma, theta)
print("\nUtilities (Value Function):")
for state, value in U.items():
    print(f"{state}: {value:.2f}")

Converged after 2 iterations.

Utilities (Value Function):
Home: 0.00
About: 0.00
Products: 1.00
Cart: 2.00
Contact: -1.00
Checkout: 10.00


In [None]:
# Step 5: Extract optimal policy from the utilities
def extract_policy(matrix, utilities):
    policy = {}
    for state in matrix.index:
        best_action = None
        best_value = float('-inf')
        for next_state in matrix.columns:
            prob = matrix.loc[state, next_state]
            value = prob * utilities[next_state]
            if value > best_value:
                best_value = value
                best_action = next_state
        policy[state] = best_action
    return policy

policy = extract_policy(transition_matrix, U)