# BT4014 Project (Group 6)
## MMA Bandit Problem
| Student Name | Matriculation Number |
| --- | --- |
| Mah Hoy Ping, Kenneth | A0249775J |
| Yeo Wei Han | A0234774X |
| Xia Yilin | A0239469L |

# Imports

In [None]:
import pandas as pd
import datetime

# Reading in the dataset

In [None]:
# Using the basic stats as outlined in the dataset description (except urls)
masterMLpublic = pd.read_csv("/kaggle/input/mma-differentials-and-elo/masterMLpublic.csv",
                            usecols = [
                                "date", "result", "fighter", "opponent", "division",
                                "stance", "dob", "method", "total_comp_time", "round", 
                                "time", "referee", "time_format", "reach", "height",
                                "age", "knockdowns", "sub_attempts", "reversals", "control",
                                "takedowns_landed", "takedowns_attempts",
                                "sig_strikes_landed", "sig_strikes_attempts",
                                "total_strikes_landed", "total_strikes_attempts",
                                "head_strikes_landed", "head_strikes_attempts",
                                "body_strikes_landed", "body_strikes_attempts",
                                "leg_strikes_landed", "leg_strikes_attempts",
                                "distance_strikes_landed", "distance_strikes_attempts",
                                "clinch_strikes_landed", "clinch_strikes_attempts",
                                "ground_strikes_landed", "ground_strikes_attempts",
                                "KO_losses", "days_since_last_comp", "lose_streak",
                                "win_streak", "win_loss_ratio", "total_comp_time", "stamina",
                                "num_fights", "trueskill", "elo"
                            ])
# Convert the date columns from object into datetime
masterMLpublic["date"] = pd.to_datetime(masterMLpublic["date"]).dt.date
masterMLpublic["dob"] = pd.to_datetime(masterMLpublic["dob"]).dt.date
# removing draws because nobody wins
masterMLpublic = masterMLpublic[masterMLpublic["method"] != "DRAW"]
# removing dqs because they are not standard wins
masterMLpublic = masterMLpublic[masterMLpublic["method"] != "DQ"]
masterMLpublic

In [None]:
masterMLpublic['division'].value_counts()

In [None]:
lightweight = masterMLpublic[masterMLpublic["division"] == "Lightweight"]
lightweight

In [None]:
welterweight = masterMLpublic[masterMLpublic["division"] == "Welterweight"]
welterweight

In [None]:
# Assuming masterMLpublic is a pandas DataFrame
na_counts = welterweight.isna().sum()
na_counts

# Data Cleaning

In [None]:
#null values
average_reach_welterweight = welterweight[welterweight['division'] == 'Welterweight']['reach'].mean()
welterweight['reach'].fillna(average_reach_welterweight, inplace=True)

average_age_welterweight = welterweight[welterweight['division'] == 'Welterweight']['age'].mean()
welterweight['age'].fillna(average_age_welterweight, inplace=True)

average_height_welterweight = welterweight[welterweight['division'] == 'Welterweight']['height'].mean()
welterweight['height'].fillna(average_height_welterweight, inplace=True)

specific_value = 216.1080438291005
welterweight.loc[welterweight['days_since_last_comp'] == specific_value, 'days_since_last_comp'] = welterweight['age']

In [None]:
cleaned_lightweight = lightweight

#null values
average_reach_lightweight = cleaned_lightweight[cleaned_lightweight['division'] == 'Lightweight']['reach'].mean()
cleaned_lightweight['reach'].fillna(average_reach_lightweight, inplace=True)

average_age_lightweight = cleaned_lightweight[cleaned_lightweight['division'] == 'Lightweight']['age'].mean()
cleaned_lightweight['age'].fillna(average_age_lightweight, inplace=True)

average_height_lightweight = cleaned_lightweight[cleaned_lightweight['division'] == 'Lightweight']['height'].mean()
cleaned_lightweight['height'].fillna(average_height_lightweight, inplace=True)

specific_value = 216.1080438291005
cleaned_lightweight.loc[cleaned_lightweight['days_since_last_comp'] == specific_value, 'days_since_last_comp'] = cleaned_lightweight['age']

In [None]:
lightweight = cleaned_lightweight

In [None]:
lightweight

# LinUCB 

### copy and edit/optimise after this markdown

In [None]:
lightweight.dtypes

The new code

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

class ContextualThompsonSampling:
    def __init__(self, n_arms, n_features, delta=0.5,
                 R=0.01, epsilon=0.5, random_state=456):
        self.n_arms = n_arms
        self.n_features = n_features
        self.random_state = random_state

        # 0 < delta < 1
        if not isinstance(delta, float):
            raise ValueError("delta should be float")
        elif (delta < 0) or (delta >= 1):
            raise ValueError("delta should be in (0, 1]")
        else:
            self.delta = delta

        # R > 0
        if not isinstance(R, float):
            raise ValueError("R should be float")
        elif R <= 0:
            raise ValueError("R should be positive")
        else:
            self.R = R

        # 0 < epsilon < 1
        if not isinstance(epsilon, float):
            raise ValueError("epsilon should be float")
        elif (epsilon < 0) or (epsilon > 1):
            raise ValueError("epsilon should be in (0, 1)")
        else:
            self.epsilon = epsilon

        self.A = [np.identity(n_features) for _ in range(n_arms)]
        self.b = [np.zeros(n_features) for _ in range(n_arms)]

    def select_arm(self, context):
        scores = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            A_inv = np.linalg.inv(self.A[arm])
            mu_hat = A_inv @ self.b[arm]
            v = self.R * np.sqrt(24 / self.epsilon * self.n_features * np.log(1 / self.delta))
            mu_tilde = np.random.multivariate_normal(mu_hat.flat, v**2 * A_inv)[..., np.newaxis]
            scores[arm] = context @ mu_tilde

        selected_arm = np.argmax(scores)
        return selected_arm

    def update(self, arm, context, reward):
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context
        
# Assume 'lightweight' is a pandas DataFrame containing your data
# Assume 'chosen_features' is defined as the list of features for the fighters
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]

# Ensure 'result' column exists and indicates the winner (1 for the winner, 0 for the loser)

num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

# Initialize variables to track accuracy in chunks
chunk_size = 100  # Define the size of each chunk for reporting accuracy
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

# Create an instance of the ContextualThompsonSampling class
cts = ContextualThompsonSampling(2, num_features)

total_reward = 0
correct_predictions = 0

# print("Timestep | Chosen Arm | Actual Winner | Reward")
for fight_index in range(num_fights):
    # Get the records for both fighters in the current fight
    fighter1_record = lightweight.iloc[fight_index * 2]
    fighter2_record = lightweight.iloc[fight_index * 2 + 1]

    # Convert features from both fighters to numeric types and handle non-numeric entries
    fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
    fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

    context = np.concatenate([fighter1_features, fighter2_features])
    
    # Use Contextual Thompson Sampling to recommend an arm (fighter)
    chosen_arm = cts.select_arm(context)

    # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
    actual_winner = 0 if fighter1_record['result'] == 1 else 1
    
     # Reward is 1 if the chosen arm matches the actual winner, else 0
    reward = 1 if chosen_arm == actual_winner else 0

    # Update the model
    cts.update(chosen_arm, context, reward)
    
    # print(fight_index)
    
    # print("Fighter " + str(fighter1_record['fighter'] + " " + str(fighter1_record['result'])) + ", " + "Fighter " + str(fighter2_record['fighter'] + " " + str(fighter2_record['result'])))
    
    winning_fighter = "name"
    if actual_winner == 0:
        winning_fighter = fighter1_record['fighter']
    else:
        winning_fighter = fighter2_record['fighter']
        
    # print(f"Actual Winner: Fighter " + winning_fighter)
    
    # Determine the current chunk
    current_chunk = fight_index // chunk_size
    
    # Update rewards and correct predictions for the current chunk
    chunk_rewards[current_chunk] += reward
    if chosen_arm == actual_winner:
        chunk_correct_predictions[current_chunk] += 1
    
    # Print the timestep, rewards, and chosen arm
    # print(f"{fight_index+1:8} | {chosen_arm:11} | {actual_winner:14} | {reward:6}")
    
    # Track total reward and correct predictions
    total_reward += reward
    correct_predictions += (chosen_arm == actual_winner)

# Print accuracy for each chunk
for i in range(len(chunk_rewards)):
    if (i + 1) * chunk_size <= num_fights:
        print(f"Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {chunk_correct_predictions[i] / chunk_size}")
    else:  # Handle the last chunk which might be smaller than chunk_size
        print(f"Accuracy for steps {i * chunk_size + 1} - {num_fights}: {chunk_correct_predictions[i] / (num_fights - i * chunk_size)}")
        
import matplotlib.pyplot as plt

# Calculate the accuracy for each chunk
accuracies = [chunk_correct_predictions[i] / chunk_size if (i + 1) * chunk_size <= num_fights
              else chunk_correct_predictions[i] / (num_fights - i * chunk_size)
              for i in range(len(chunk_rewards))]

# Create x-axis values for the plot
x_values = [i * chunk_size + 1 if (i + 1) * chunk_size <= num_fights
            else num_fights
            for i in range(len(chunk_rewards))]

# Plot the line graph
plt.plot(x_values, accuracies, marker='o', linestyle='-')
plt.title('Accuracy Over Chunks')
plt.xlabel('Steps')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()
        
# Calculate accuracy
accuracy = correct_predictions / num_fights
print(f"Total reward: {total_reward}")
print(f"Accuracy: {accuracy}")

# Iterate through the 'alpha_values' value to find the best

In [None]:
import numpy as np
import pandas as pd

class ModifiedWinLossLinUCB:
    def __init__(self, num_arms, num_features, alpha=0.1):
        self.num_arms = num_arms  # This is actually the number of fights, not fighters
        self.num_features = num_features
        self.alpha = alpha
        self.A = [np.identity(num_features) for _ in range(self.num_arms)]  # One A matrix per fight
        self.b = [np.zeros((num_features, 1)) for _ in range(self.num_arms)]  # One b vector per fight

    def recommend(self, context):
        chosen_arm = None
        max_ucb = float('-inf')

        for arm in [0, 1]:  # Two possible arms: fighter 1 wins or fighter 2 wins
            x = context.reshape((self.num_features, 1))
            A_inv = np.linalg.inv(self.A[arm])
            theta = np.dot(A_inv, self.b[arm])
            ucb_value = np.dot(x.T, np.dot(A_inv, x))
            ucb = np.dot(theta.T, x) + self.alpha * np.sqrt(ucb_value.item())

            if ucb > max_ucb:
                max_ucb = ucb
                chosen_arm = arm

        return chosen_arm


    def update(self, fight_index, context, reward):
        x = context.reshape((self.num_features, 1))
        self.A[fight_index] += np.dot(x, x.T)
        self.b[fight_index] += reward * x
        
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]

num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

#track accuracy in chunks
chunk_size = 100  # Define the size of each chunk
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

bandit = ModifiedWinLossLinUCB(num_fights, num_features)

# List to store total accuracies for each epsilon value
total_accuracies = []

total_reward = 0
correct_predictions = 0
alpha_values = [0.01, 0.1, 0.5, 1.0, 2.0]  # Add more alpha values as needed

for alpha in alpha_values:
    print(f"\nRunning with alpha = {alpha}")

    # Initialize the bandit with the current alpha
    bandit = ModifiedWinLossLinUCB(num_fights, num_features, alpha)

    total_reward = 0
    correct_predictions = 0

    chunk_rewards = [0] * (num_fights // chunk_size + 1)
    chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)
    
    # print("Timestep | Chosen Arm | Actual Winner | Reward")
    for fight_index in range(num_fights):
        # Get the records for both fighters in the current fight
        fighter1_record = lightweight.iloc[fight_index * 2]
        fighter2_record = lightweight.iloc[fight_index * 2 + 1]

        # ensure features are numeric types
        fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
        fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

        context = np.concatenate([fighter1_features, fighter2_features])
        # Predict the winner using the combined context
        chosen_arm = bandit.recommend(context)  # 0 or 1

        # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
        actual_winner = 0 if fighter1_record['result'] == 1 else 1

        # print(fight_index)
        # print("Fighter " + str(fighter1_record['fighter'] + " " + str(fighter1_record['result'])) + ", " + "Fighter " + str(fighter2_record['fighter'] + " " + str(fighter2_record['result'])))

        winning_fighter = "name"
        if actual_winner == 0:
            winning_fighter = fighter1_record['fighter']
        else:
            winning_fighter = fighter2_record['fighter']
        # print(f"Actual Winner: Fighter " + winning_fighter)

        # Update the bandit with the outcome of the fight
        reward = 1 if chosen_arm == actual_winner else 0
        bandit.update(fight_index, context, reward)

        # Determine the current chunk
        current_chunk = fight_index // chunk_size

        # Update rewards and correct predictions for the current chunk
        chunk_rewards[current_chunk] += reward
        if chosen_arm == actual_winner:
            chunk_correct_predictions[current_chunk] += 1

        # Print
        # print(f"{fight_index+1:8} | {chosen_arm:11} | {actual_winner:14} | {reward:6}")

        # Track total reward and correct predictions
        total_reward += reward
        correct_predictions += (chosen_arm == actual_winner)
        
    # Calculate accuracy
    accuracy = correct_predictions / num_fights

    # Append accuracy to the list
    total_accuracies.append(accuracy)

    # Print accuracy for each chunk
    for i in range(len(chunk_rewards)):
        if (i + 1) * chunk_size <= num_fights:
            print(f"Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {chunk_correct_predictions[i] / chunk_size}")
        else:  # Handle the last chunk
            print(f"Accuracy for steps {i * chunk_size + 1} - {num_fights}: {chunk_correct_predictions[i] / (num_fights - i * chunk_size)}")

    import matplotlib.pyplot as plt

    # Calculate the accuracy for each chunk
    accuracies = [chunk_correct_predictions[i] / chunk_size if (i + 1) * chunk_size <= num_fights
                  else chunk_correct_predictions[i] / (num_fights - i * chunk_size)
                  for i in range(len(chunk_rewards))]

    # Create x-axis values for the plot
    x_values = [i * chunk_size + 1 if (i + 1) * chunk_size <= num_fights
                else num_fights
                for i in range(len(chunk_rewards))]

    # Plot the line graph
    plt.plot(x_values, accuracies, marker='o', linestyle='-')
    plt.title('Accuracy Over Chunks')
    plt.xlabel('Steps')
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.show()

    # Calculate accuracy
    accuracy = correct_predictions / num_fights
    print(f"Total reward: {total_reward}")
    print(f"Accuracy: {accuracy}")
    
# Plot total accuracies for each epsilon value
plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, total_accuracies, marker='o', linestyle='-')
plt.title('Total Accuracy for Different Alpha Values')
plt.xlabel('Alpha')
plt.ylabel('Total Accuracy')
plt.grid(True)
plt.show()


# Contextual thompson sampling

## Iterate through the 'delta_values' value to find the best

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

class ContextualThompsonSampling:
    def __init__(self, n_arms, n_features, delta=0.5,
                 R=0.01, epsilon=0.5, random_state=456):
        self.n_arms = n_arms
        self.n_features = n_features
        self.random_state = random_state

        # 0 < delta < 1
        if not isinstance(delta, float):
            raise ValueError("delta should be float")
        elif (delta < 0) or (delta >= 1):
            raise ValueError("delta should be in (0, 1]")
        else:
            self.delta = delta

        # R > 0
        if not isinstance(R, float):
            raise ValueError("R should be float")
        elif R <= 0:
            raise ValueError("R should be positive")
        else:
            self.R = R

        # 0 < epsilon < 1
        if not isinstance(epsilon, float):
            raise ValueError("epsilon should be float")
        elif (epsilon < 0) or (epsilon > 1):
            raise ValueError("epsilon should be in (0, 1)")
        else:
            self.epsilon = epsilon

        self.A = [np.identity(n_features) for _ in range(n_arms)]
        self.b = [np.zeros(n_features) for _ in range(n_arms)]

    def select_arm(self, context):
        scores = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            A_inv = np.linalg.inv(self.A[arm])
            mu_hat = A_inv @ self.b[arm]
            v = self.R * np.sqrt(24 / self.epsilon * self.n_features * np.log(1 / self.delta))
            mu_tilde = np.random.multivariate_normal(mu_hat.flat, v**2 * A_inv)[..., np.newaxis]
            scores[arm] = context @ mu_tilde

        selected_arm = np.argmax(scores)
        return selected_arm

    def update(self, arm, context, reward):
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context
        
# Assume 'lightweight' is a pandas DataFrame containing your data
# Assume 'chosen_features' is defined as the list of features for the fighters
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]
# Define a list of delta values to iterate over
delta_values = [0.1, 0.2, 0.4, 0.6, 0.8, 0.999]

# Ensure 'result' column exists and indicates the winner (1 for the winner, 0 for the loser)
num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

# Initialize variables to track accuracy in chunks
chunk_size = 100  # Define the size of each chunk for reporting accuracy
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

# List to store total accuracies for each epsilon value
total_accuracies = []

# Iterate over delta values
for delta_value in delta_values:
    # Create an instance of the ContextualThompsonSampling class with the current delta value
    cts = ContextualThompsonSampling(2, num_features, delta=delta_value)

    # Initialize variables to track accuracy in chunks for the current delta
    chunk_rewards = [0] * (num_fights // chunk_size + 1)
    chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)
    total_reward = 0
    correct_predictions = 0

    # print("Timestep | Chosen Arm | Actual Winner | Reward")
    for fight_index in range(num_fights):
        # Get the records for both fighters in the current fight
        fighter1_record = lightweight.iloc[fight_index * 2]
        fighter2_record = lightweight.iloc[fight_index * 2 + 1]

        # Convert features from both fighters to numeric types and handle non-numeric entries
        fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
        fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

        context = np.concatenate([fighter1_features, fighter2_features])

        # Use Contextual Thompson Sampling to recommend an arm (fighter)
        chosen_arm = cts.select_arm(context)

        # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
        actual_winner = 0 if fighter1_record['result'] == 1 else 1

         # Reward is 1 if the chosen arm matches the actual winner, else 0
        reward = 1 if chosen_arm == actual_winner else 0

        # Update the model
        cts.update(chosen_arm, context, reward)

        # print(fight_index)

        # print("Fighter " + str(fighter1_record['fighter'] + " " + str(fighter1_record['result'])) + ", " + "Fighter " + str(fighter2_record['fighter'] + " " + str(fighter2_record['result'])))

        winning_fighter = "name"
        if actual_winner == 0:
            winning_fighter = fighter1_record['fighter']
        else:
            winning_fighter = fighter2_record['fighter']

        # print(f"Actual Winner: Fighter " + winning_fighter)

        # Determine the current chunk
        current_chunk = fight_index // chunk_size

        # Update rewards and correct predictions for the current chunk
        chunk_rewards[current_chunk] += reward
        if chosen_arm == actual_winner:
            chunk_correct_predictions[current_chunk] += 1

        # Print the timestep, rewards, and chosen arm
        # print(f"{fight_index+1:8} | {chosen_arm:11} | {actual_winner:14} | {reward:6}")

        # Track total reward and correct predictions
        total_reward += reward
        correct_predictions += (chosen_arm == actual_winner)
        
    # Calculate accuracy
    accuracy = correct_predictions / num_fights

    # Append accuracy to the list
    total_accuracies.append(accuracy)
    
    # Print accuracy for each chunk
    for i in range(len(chunk_rewards)):
        if (i + 1) * chunk_size <= num_fights:
            print(f"Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {chunk_correct_predictions[i] / chunk_size}")
        else:  # Handle the last chunk which might be smaller than chunk_size
            print(f"Accuracy for steps {i * chunk_size + 1} - {num_fights}: {chunk_correct_predictions[i] / (num_fights - i * chunk_size)}")

    # Calculate accuracy
    accuracy = correct_predictions / num_fights
    print(f"Total reward for delta={delta_value}: {total_reward}")
    print(f"Accuracy for delta={delta_value}: {accuracy}") # Plotting (optional)
    x_values = [i * chunk_size + 1 if (i + 1) * chunk_size <= num_fights
                else num_fights
                for i in range(len(chunk_rewards))]
    accuracies = [chunk_correct_predictions[i] / chunk_size if (i + 1) * chunk_size <= num_fights
                  else chunk_correct_predictions[i] / (num_fights - i * chunk_size)
                  for i in range(len(chunk_rewards))]
    plt.plot(x_values, accuracies, marker='o', linestyle='-', label=f'delta={delta_value}')
    # Show the plot
    plt.title('Accuracy Over Chunks')
    plt.xlabel('Steps')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()
    
# Plot total accuracies for each epsilon value
plt.figure(figsize=(10, 6))
plt.plot(delta_values, total_accuracies, marker='o', linestyle='-')
plt.title('Total Accuracy for Different Delta Values')
plt.xlabel('Delta')
plt.ylabel('Total Accuracy')
plt.grid(True)
plt.show()

# Iterate through the 'epsilon_values' value to find the best

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

class ContextualThompsonSampling:
    def __init__(self, n_arms, n_features, delta=0.5,
                 R=0.01, epsilon=0.5, random_state=456):
        self.n_arms = n_arms
        self.n_features = n_features
        self.random_state = random_state

        # 0 < delta < 1
        if not isinstance(delta, float):
            raise ValueError("delta should be float")
        elif (delta < 0) or (delta >= 1):
            raise ValueError("delta should be in (0, 1]")
        else:
            self.delta = delta

        # R > 0
        if not isinstance(R, float):
            raise ValueError("R should be float")
        elif R <= 0:
            raise ValueError("R should be positive")
        else:
            self.R = R

        # 0 < epsilon < 1
        if not isinstance(epsilon, float):
            raise ValueError("epsilon should be float")
        elif (epsilon < 0) or (epsilon > 1):
            raise ValueError("epsilon should be in (0, 1)")
        else:
            self.epsilon = epsilon

        self.A = [np.identity(n_features) for _ in range(n_arms)]
        self.b = [np.zeros(n_features) for _ in range(n_arms)]

    def select_arm(self, context):
        scores = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            A_inv = np.linalg.inv(self.A[arm])
            mu_hat = A_inv @ self.b[arm]
            v = self.R * np.sqrt(24 / self.epsilon * self.n_features * np.log(1 / self.delta))
            mu_tilde = np.random.multivariate_normal(mu_hat.flat, v**2 * A_inv)[..., np.newaxis]
            scores[arm] = context @ mu_tilde

        selected_arm = np.argmax(scores)
        return selected_arm

    def update(self, arm, context, reward):
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context
        
# Assume 'lightweight' is a pandas DataFrame containing your data
# Assume 'chosen_features' is defined as the list of features for the fighters
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]

# List of epsilon values to iterate over
epsilon_values = [0.1, 0.2, 0.3, 0.4, 0.5]

# Ensure 'result' column exists and indicates the winner (1 for the winner, 0 for the loser)
num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

# Initialize variables to track accuracy in chunks
chunk_size = 100  # Define the size of each chunk for reporting accuracy
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

# List to store total accuracies for each epsilon value
total_accuracies = []

# Iterate over epsilon values
for epsilon in epsilon_values:
    # Create an instance of the ContextualThompsonSampling class with the current epsilon
    cts = ContextualThompsonSampling(2, num_features, epsilon=epsilon)

    # Initialize variables to track accuracy in chunks for the current delta
    chunk_rewards = [0] * (num_fights // chunk_size + 1)
    chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)
    total_reward = 0
    correct_predictions = 0

    # print("Timestep | Chosen Arm | Actual Winner | Reward")
    for fight_index in range(num_fights):
        # Get the records for both fighters in the current fight
        fighter1_record = lightweight.iloc[fight_index * 2]
        fighter2_record = lightweight.iloc[fight_index * 2 + 1]

        # Convert features from both fighters to numeric types and handle non-numeric entries
        fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
        fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

        context = np.concatenate([fighter1_features, fighter2_features])

        # Use Contextual Thompson Sampling to recommend an arm (fighter)
        chosen_arm = cts.select_arm(context)

        # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
        actual_winner = 0 if fighter1_record['result'] == 1 else 1

         # Reward is 1 if the chosen arm matches the actual winner, else 0
        reward = 1 if chosen_arm == actual_winner else 0

        # Update the model
        cts.update(chosen_arm, context, reward)

        # print(fight_index)

        # print("Fighter " + str(fighter1_record['fighter'] + " " + str(fighter1_record['result'])) + ", " + "Fighter " + str(fighter2_record['fighter'] + " " + str(fighter2_record['result'])))

        winning_fighter = "name"
        if actual_winner == 0:
            winning_fighter = fighter1_record['fighter']
        else:
            winning_fighter = fighter2_record['fighter']

        # print(f"Actual Winner: Fighter " + winning_fighter)

        # Determine the current chunk
        current_chunk = fight_index // chunk_size

        # Update rewards and correct predictions for the current chunk
        chunk_rewards[current_chunk] += reward
        if chosen_arm == actual_winner:
            chunk_correct_predictions[current_chunk] += 1

        # Print the timestep, rewards, and chosen arm
        # print(f"{fight_index+1:8} | {chosen_arm:11} | {actual_winner:14} | {reward:6}")

        # Track total reward and correct predictions
        total_reward += reward
        correct_predictions += (chosen_arm == actual_winner)
        
    # Calculate accuracy
    accuracy = correct_predictions / num_fights

    # Append accuracy to the list
    total_accuracies.append(accuracy)
    
    # Print accuracy for each chunk
    for i in range(len(chunk_rewards)):
        if (i + 1) * chunk_size <= num_fights:
            print(f"Epsilon={epsilon}: Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {accuracies[i]}")
        else:
            print(f"Epsilon={epsilon}: Accuracy for steps {i * chunk_size + 1} - {num_fights}: {accuracies[i]}")

    # Calculate accuracy
    accuracy = correct_predictions / num_fights
    print(f"Total reward for epsilon={epsilon}: {total_reward}")
    print(f"Accuracy for epsilon={epsilon}: {accuracy}")

    x_values = [i * chunk_size + 1 if (i + 1) * chunk_size <= num_fights
                else num_fights
                for i in range(len(chunk_rewards))]
    accuracies = [chunk_correct_predictions[i] / chunk_size if (i + 1) * chunk_size <= num_fights
                  else chunk_correct_predictions[i] / (num_fights - i * chunk_size)
                  for i in range(len(chunk_rewards))]
    plt.plot(x_values, accuracies, marker='o', linestyle='-', label=f'epsilon={epsilon}')
    # Show the plot
    plt.title('Accuracy Over Chunks')
    plt.xlabel('Steps')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()
    total_reward = 0
    correct_predictions = 0
# Plot total accuracies for each epsilon value
plt.figure(figsize=(10, 6))
plt.plot(epsilon_values, total_accuracies, marker='o', linestyle='-')
plt.title('Total Accuracy for Different Epsilon Values')
plt.xlabel('Epsilon')
plt.ylabel('Total Accuracy')
plt.grid(True)
plt.show()

# Iterate through the 'R' value to find the best

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

class ContextualThompsonSampling:
    def __init__(self, n_arms, n_features, delta=0.5,
                 R=0.01, epsilon=0.5, random_state=456):
        self.n_arms = n_arms
        self.n_features = n_features
        self.random_state = random_state

        # 0 < delta < 1
        if not isinstance(delta, float):
            raise ValueError("delta should be float")
        elif (delta < 0) or (delta >= 1):
            raise ValueError("delta should be in (0, 1]")
        else:
            self.delta = delta

        # R > 0
        if not isinstance(R, float):
            raise ValueError("R should be float")
        elif R <= 0:
            raise ValueError("R should be positive")
        else:
            self.R = R

        # 0 < epsilon < 1
        if not isinstance(epsilon, float):
            raise ValueError("epsilon should be float")
        elif (epsilon < 0) or (epsilon > 1):
            raise ValueError("epsilon should be in (0, 1)")
        else:
            self.epsilon = epsilon

        self.A = [np.identity(n_features) for _ in range(n_arms)]
        self.b = [np.zeros(n_features) for _ in range(n_arms)]

    def select_arm(self, context):
        scores = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            A_inv = np.linalg.inv(self.A[arm])
            mu_hat = A_inv @ self.b[arm]
            v = self.R * np.sqrt(24 / self.epsilon * self.n_features * np.log(1 / self.delta))

            # Add a small regularization term to ensure positive-semidefinite covariance matrix
            regularization_term = 1e-6
            cov_matrix = v**2 * A_inv + regularization_term * np.identity(self.n_features)

            # Ensure the covariance matrix is symmetric
            cov_matrix = 0.5 * (cov_matrix + cov_matrix.T)

            mu_tilde = np.random.multivariate_normal(mu_hat.flat, cov_matrix)[..., np.newaxis]
            scores[arm] = context @ mu_tilde

        selected_arm = np.argmax(scores)
        return selected_arm

    def update(self, arm, context, reward):
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context
        
        
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]

# Iterate over different values of 'R'
R_values = [0.01, 0.1, 0.5, 1.0]  

num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

# Initialize variables to track accuracy in chunks
chunk_size = 100  # Define the size of each chunk for reporting accuracy
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

# List to store total accuracies for each epsilon value
total_accuracies = []

# Iterate over epsilon values
for R_value in R_values:
    cts = ContextualThompsonSampling(2, num_features, R=R_value)

    total_reward = 0
    correct_predictions = 0
    chunk_rewards = [0] * (num_fights // chunk_size + 1)
    chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)

    for fight_index in range(num_fights):
        # Get the records for both fighters in the current fight
        fighter1_record = lightweight.iloc[fight_index * 2]
        fighter2_record = lightweight.iloc[fight_index * 2 + 1]

        # Convert features from both fighters to numeric types and handle non-numeric entries
        fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
        fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

        context = np.concatenate([fighter1_features, fighter2_features])

        # Use Contextual Thompson Sampling to recommend an arm (fighter)
        chosen_arm = cts.select_arm(context)

        # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
        actual_winner = 0 if fighter1_record['result'] == 1 else 1

         # Reward is 1 if the chosen arm matches the actual winner, else 0
        reward = 1 if chosen_arm == actual_winner else 0

        # Update the model
        cts.update(chosen_arm, context, reward)

        winning_fighter = "name"
        if actual_winner == 0:
            winning_fighter = fighter1_record['fighter']
        else:
            winning_fighter = fighter2_record['fighter']

        # Determine the current chunk
        current_chunk = fight_index // chunk_size

        # Update rewards and correct predictions for the current chunk
        chunk_rewards[current_chunk] += reward
        if chosen_arm == actual_winner:
            chunk_correct_predictions[current_chunk] += 1

        # Track total reward and correct predictions
        total_reward += reward
        correct_predictions += (chosen_arm == actual_winner)
        
    # Calculate accuracy
    accuracy = correct_predictions / num_fights

    # Append accuracy to the list
    total_accuracies.append(accuracy)
    
    # Print accuracy for each chunk
    for i in range(len(chunk_rewards)):
        if (i + 1) * chunk_size <= num_fights:
            print(f"R_value={R_value}: Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {accuracies[i]}")
        else:
            print(f"R_value={R_value}: Accuracy for steps {i * chunk_size + 1} - {num_fights}: {accuracies[i]}")

    # Calculate accuracy
    accuracy = correct_predictions / num_fights
    print(f"Total reward for 'r'={R_value}: {total_reward}")
    print(f"Accuracy for 'r'={R_value}: {accuracy}")

    # Create x-axis values for the plot
    x_values = [i * chunk_size + 1 if (i + 1) * chunk_size <= num_fights
                else num_fights
                for i in range(len(chunk_rewards))]
    accuracies = [chunk_correct_predictions[i] / chunk_size if (i + 1) * chunk_size <= num_fights
                  else chunk_correct_predictions[i] / (num_fights - i * chunk_size)
                  for i in range(len(chunk_rewards))]
    plt.plot(x_values, accuracies, marker='o', linestyle='-', label=f'R={R_value}')
    # Show the plot
    plt.title('Accuracy Over Chunks')
    plt.xlabel('Steps')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True)
    plt.show()
    total_reward = 0
    correct_predictions = 0
    
# Plot total accuracies for each epsilon value
plt.figure(figsize=(10, 6))
plt.plot(R_values, total_accuracies, marker='o', linestyle='-')  # Use R_values instead of epsilon_values
plt.title("Total Accuracy for Different 'R' Values")
plt.xlabel('R value')
plt.ylabel('Total Accuracy')
plt.grid(True)
plt.show()

# Optimised model 
### most optimised delta, epsilon and r is ,0.9 , 0.40, 0.10 respectively

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

class ContextualThompsonSampling:
    def __init__(self, n_arms, n_features, delta=0.9,
                 R=0.1, epsilon=0.4, random_state=456):
        self.n_arms = n_arms
        self.n_features = n_features
        self.random_state = random_state

        # 0 < delta < 1
        if not isinstance(delta, float):
            raise ValueError("delta should be float")
        elif (delta < 0) or (delta >= 1):
            raise ValueError("delta should be in (0, 1]")
        else:
            self.delta = delta

        # R > 0
        if not isinstance(R, float):
            raise ValueError("R should be float")
        elif R <= 0:
            raise ValueError("R should be positive")
        else:
            self.R = R

        # 0 < epsilon < 1
        if not isinstance(epsilon, float):
            raise ValueError("epsilon should be float")
        elif (epsilon < 0) or (epsilon > 1):
            raise ValueError("epsilon should be in (0, 1)")
        else:
            self.epsilon = epsilon

        self.A = [np.identity(n_features) for _ in range(n_arms)]
        self.b = [np.zeros(n_features) for _ in range(n_arms)]

    def select_arm(self, context):
        scores = np.zeros(self.n_arms)
        for arm in range(self.n_arms):
            A_inv = np.linalg.inv(self.A[arm])
            mu_hat = A_inv @ self.b[arm]
            v = self.R * np.sqrt(24 / self.epsilon * self.n_features * np.log(1 / self.delta))
            mu_tilde = np.random.multivariate_normal(mu_hat.flat, v**2 * A_inv)[..., np.newaxis]
            scores[arm] = context @ mu_tilde

        selected_arm = np.argmax(scores)
        return selected_arm

    def update(self, arm, context, reward):
        self.A[arm] += np.outer(context, context)
        self.b[arm] += reward * context
        
# Assume 'lightweight' is a pandas DataFrame containing your data
# Assume 'chosen_features' is defined as the list of features for the fighters
chosen_features = [
    "total_comp_time", "reach", "height", "age",
    "knockdowns", "sub_attempts", "reversals", "control",
    "takedowns_landed", "takedowns_attempts",
    "sig_strikes_landed", "sig_strikes_attempts",
    "total_strikes_landed", "total_strikes_attempts",
    "head_strikes_landed", "head_strikes_attempts",
    "body_strikes_landed", "body_strikes_attempts",
    "leg_strikes_landed", "leg_strikes_attempts",
    "distance_strikes_landed", "distance_strikes_attempts",
    "clinch_strikes_landed", "clinch_strikes_attempts",
    "ground_strikes_landed", "ground_strikes_attempts",
    "KO_losses", "days_since_last_comp", "lose_streak",
    "win_streak", "win_loss_ratio",
    "stamina", "num_fights", "trueskill", "elo"
]

# Ensure 'result' column exists and indicates the winner (1 for the winner, 0 for the loser)

num_fights = len(lightweight) // 2
num_features = len(chosen_features) * 2  # Features from both fighters

# Initialize variables to track accuracy in chunks
chunk_size = 100  # Define the size of each chunk for reporting accuracy
chunk_rewards = [0] * (num_fights // chunk_size + 1)  # List to hold rewards for each chunk
chunk_correct_predictions = [0] * (num_fights // chunk_size + 1)  # List to hold correct predictions count for each chunk

# Create an instance of the ContextualThompsonSampling class
cts = ContextualThompsonSampling(2, num_features)

total_reward = 0
correct_predictions = 0

for fight_index in range(num_fights):
    # Get the records for both fighters in the current fight
    fighter1_record = lightweight.iloc[fight_index * 2]
    fighter2_record = lightweight.iloc[fight_index * 2 + 1]

    # Convert features from both fighters to numeric types and handle non-numeric entries
    fighter1_features = pd.to_numeric(fighter1_record[chosen_features], errors='coerce').fillna(0)
    fighter2_features = pd.to_numeric(fighter2_record[chosen_features], errors='coerce').fillna(0)

    context = np.concatenate([fighter1_features, fighter2_features])
    
    # Use Contextual Thompson Sampling to recommend an arm (fighter)
    chosen_arm = cts.select_arm(context)

    # Determine the actual winner (arm 0 represents fighter 1 and arm 1 represents fighter 2)
    actual_winner = 0 if fighter1_record['result'] == 1 else 1
    
     # Reward is 1 if the chosen arm matches the actual winner, else 0
    reward = 1 if chosen_arm == actual_winner else 0

    # Update the model
    cts.update(chosen_arm, context, reward)

    winning_fighter = "name"
    if actual_winner == 0:
        winning_fighter = fighter1_record['fighter']
    else:
        winning_fighter = fighter2_record['fighter']
        
    
    # Determine the current chunk
    current_chunk = fight_index // chunk_size
    
    # Update rewards and correct predictions for the current chunk
    chunk_rewards[current_chunk] += reward
    if chosen_arm == actual_winner:
        chunk_correct_predictions[current_chunk] += 1
    
    # Track total reward and correct predictions
    total_reward += reward
    correct_predictions += (chosen_arm == actual_winner)

# Print accuracy for each chunk
for i in range(len(chunk_rewards)):
    if (i + 1) * chunk_size <= num_fights:
        print(f"Accuracy for steps {i * chunk_size + 1} - {(i + 1) * chunk_size}: {chunk_correct_predictions[i] / chunk_size}")
    else:  # Handle the last chunk which might be smaller than chunk_size
        print(f"Accuracy for steps {i * chunk_size + 1} - {num_fights}: {chunk_correct_predictions[i] / (num_fights - i * chunk_size)}")
        
# Calculate accuracy
accuracy = correct_predictions / num_fights
print(f"Total reward: {total_reward}")
print(f"Accuracy: {accuracy}")