In [3]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split

# Read the CSV file
data = pd.read_csv('finaldata.csv')

# Extract the relevant columns as states
states = data[['academic_load', 'current_grade_average', 'time_of_semester', 'mental_state', 'sleep_time', 'break_time']].values.tolist()

# Define the actions
actions = ['Increase Study Time', 'Decrease Study Time', 'Increase Break Time', 'Decrease Break Time',
           'Increase Sleep Time', 'Decrease Sleep Time', 'Increase Social Time', 'Decrease Social Time']

def calculate_transition_probabilities(data):
    # Calculate transition probabilities based on the dataset
    transitions = {}

    for state in data.values:
        state_tuple = tuple(state)
        transitions[state_tuple] = {}

        current_state_data = data[
            (data['academic_load'] == state[0]) &
            (data['current_grade_average'] == state[1]) &
            (data['time_of_semester'] == state[2]) &
            (data['mental_state'] == state[3])
        ]

        action_counts = current_state_data['actions'].value_counts()
        total_count = action_counts.sum()

        for action in actions:
            if action in action_counts:
                transition_prob = action_counts[action] / total_count
            else:
                transition_prob = 0.0
            transitions[state_tuple][action] = transition_prob

    # Convert transition probabilities to a 3D array
    num_states = len(states)
    num_actions = len(actions)
    transition_probabilities = np.zeros((num_actions, num_states, num_states))

    for state_idx, state in enumerate(states):
        state_tuple = tuple(state)
        for action_idx, action in enumerate(actions):
            transition_probabilities[action_idx, state_idx, :] = [
                transitions.get(state_tuple, {}).get(action, 0.0)
                for _ in range(num_states)
            ]

    return transition_probabilities


def calculate_rewards(training_data):
    num_states = len(states)
    num_actions = len(actions)
    rewards = np.random.rand(num_actions, num_states)
    
    return rewards

class MDP:
    def __init__(self, transition_probabilities, rewards, discount_factor=0.9):
        self.transition_probabilities = transition_probabilities
        self.rewards = rewards
        self.discount_factor = discount_factor
    
    def solve(self):
        num_states = len(states)
        num_actions = len(actions)
        
        # Perform value iteration
        values = np.zeros(num_states)
        policy = np.zeros(num_states, dtype=int)
        
        while True:
            new_values = np.zeros(num_states)
            
            for state in range(num_states):
                q_values = np.zeros(num_actions)
                
                for action in range(num_actions):
                    q_value = 0.0
                    
                    for next_state in range(num_states):
                        q_value += self.transition_probabilities[action, state, next_state] * \
                                   (self.rewards[action, state] + self.discount_factor * values[next_state])
                    
                    q_values[action] = q_value
                
                new_values[state] = np.max(q_values)
                policy[state] = np.argmax(q_values)
            
            if np.max(np.abs(new_values - values)) < 1e-8:
                break
            
            values = new_values
        
        self.values = values
        self.policy = policy

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define the transition probabilities using the training data
transition_probabilities = calculate_transition_probabilities(train_data)

# Define the reward function using the training data
rewards = calculate_rewards(train_data)

# Create the MDP model
mdp_model = MDP(transition_probabilities, rewards)

# Solve the MDP
mdp_model.solve()

# Retrieve the optimal policy
optimal_policy = mdp_model.policy


# Evaluate the model's performance on the testing set
start_time = time.time()
correct_predictions = 0
total_predictions = 0

for index, test_row in test_data.iterrows():
    state = test_row[['academic_load', 'current_grade_average', 'time_of_semester', 'mental_state', 'sleep_time', 'break_time']].values
    
    # Get the expected action from the test data
    expected_action = test_row['actions']
    
    # Get the optimal action predicted by the model
    optimal_action = actions[optimal_policy[index]]
    
    # Get the index of the optimal action
    optimal_action_index = actions.index(optimal_action)
    
    # Get the reward for the optimal action and state
    reward = rewards[optimal_action_index, index]
    
    # Compare the predicted action with the expected action
    if optimal_action == expected_action:
        correct_predictions += 1
    
    total_predictions += 1
    
    print(f"State: {state}, Optimal Action: {optimal_action}, Expected Action: {expected_action}, Reward: {reward}")
    
execution_time = time.time() - start_time

# Calculate global accuracy
global_accuracy = correct_predictions / total_predictions

print(f"\nGlobal Accuracy: {global_accuracy}")
print(f"Execution Time: {execution_time} seconds")


State: [17.649250271320458 0.0 1 'Good' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Break Time, Reward: 0.27538983206195156
State: [18.698557423825992 0.0 5 'Poor' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Break Time, Reward: 0.12249961394125375
State: [20.742724859206668 0.0 1 'Excellent' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Break Time, Reward: 0.8854140103427115
State: [13.35899785137698 0.0 5 'Excellent' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Study Time, Reward: 0.7890007633698047
State: [15.98621692010918 0.0 3 'Good' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Study Time, Reward: 0.539945336126654
State: [13.8392496845942 0.0 5 'Good' 5.0 1.0], Optimal Action: Increase Study Time, Expected Action: Increase Study Time, Reward: 0.03412785051947398
State: [14.508230444044424 0.0 5 'Good' 5.0 1.0], Optimal Action: Increase Study