In [2]:
import pandas as pd

# Loading the CSV files into DataFrames
file_paths = {
    "2_card_open_actual": "data/2_card_open_actual.csv",
    "3_card_covered_actual": "data/3_card_covered_actual.csv",
    "3_card_covered_predictions": "data/3_card_covered_predictions.csv",
    "3_card_open_actual": "data/3_card_open_actual.csv",
    "actual_1_card": "data/actual_1_card.csv",
    "actual_2_card_covered": "data/actual_2_card_covered.csv",
    "predictions_2_card_covered": "data/predictions_2_card_covered.csv",
    "predictions_2_card_open": "data/predictions_2_card_open.csv",
    "predictions_one_card": "data/predictions_one_card.csv",
    "predictions_open_3_card": "data/predictions_open_3_card.csv"
}

dataframes = {name: pd.read_csv(path) for name, path in file_paths.items()}


In [3]:

# initial accuracy analysis for the 1 card condition.


actual_1_card = dataframes["actual_1_card"]

# Predictions one card values
predictions_one_card = dataframes["predictions_one_card"]
def extract_card_value(pred_string):
    return pred_string.split(',')[0]

# Extract actual card values
actual_1_card["Dealer_Actual"] = actual_1_card["Dealer"]
actual_1_card["Player_Actual"] = actual_1_card["Player"]

predictions_one_card["Dealer_Predicted"] = predictions_one_card["Dealer"].apply(extract_card_value)
predictions_one_card["Player_Predicted"] = predictions_one_card["Player"].apply(extract_card_value)

one_card_comparison = pd.DataFrame({
    "Dealer_Actual": actual_1_card["Dealer_Actual"],
    "Dealer_Predicted": predictions_one_card["Dealer_Predicted"],
    "Player_Actual": actual_1_card["Player_Actual"],
    "Player_Predicted": predictions_one_card["Player_Predicted"]
})

dealer_accuracy_1 = (one_card_comparison["Dealer_Actual"] == one_card_comparison["Dealer_Predicted"]).mean()
player_accuracy_1 = (one_card_comparison["Player_Actual"] == one_card_comparison["Player_Predicted"]).mean()
total_accuracy_1 = (dealer_accuracy_1 + player_accuracy_1) / 2

one_card_accuracy = {
    "Dealer_Accuracy": dealer_accuracy_1,
    "Player_Accuracy": player_accuracy_1,
    "Total_Accuracy": total_accuracy_1
}

one_card_comparison, one_card_accuracy


(    Dealer_Actual Dealer_Predicted Player_Actual Player_Predicted
 0              9C               9C           10S              10S
 1              6H               6H            2C       (Score: 0)
 2              5S               5S            JC               JC
 3              7D               7D            7C               7C
 4              QH               QH            5H               5H
 ..            ...              ...           ...              ...
 99             QH               QH            JD               JD
 100            8D               8D            AD               AD
 101           10H              10H            9C               9C
 102            6H               6H            5H               5H
 103            2C               2C            4H               4H
 
 [104 rows x 4 columns],
 {'Dealer_Accuracy': 0.9326923076923077,
  'Player_Accuracy': 0.9230769230769231,
  'Total_Accuracy': 0.9278846153846154})

In [4]:
import pandas as pd

file_paths = {
    '3_card_covered_predictions': 'data/3_card_covered_predictions.csv',
    'one_card_actual': 'data/actual_1_card.csv',
    'one_card_predictions': 'data/predictions_one_card.csv',
    '2_card_open_actual': 'data/2_card_open_actual.csv',
    '2_card_open_predictions': 'data/predictions_2_card_open.csv',
    '2_card_covered_actual': 'data/actual_2_card_covered.csv',
    '2_card_covered_predictions': 'data/predictions_2_card_covered.csv',
    '3_card_open_actual': 'data/3_card_open_actual.csv',
    '3_card_open_predictions': 'data/predictions_open_3_card.csv',
    '3_card_covered_actual': 'data/3_card_covered_actual.csv',
    '3_card_covered_predictions': 'data/3_card_covered_predictions.csv',
    '1_card_low_light_predictions': 'data/one_card_low_light_predictions.csv',
    '1_card_low_light_actual': 'data/one_card_low_light_actual.csv',
    '1_card_high_light_predictions': 'data/one_card_high_light_predictions.csv',
    '1_card_high_light_actual': 'data/one_card_high_light_actual.csv'
}

# Load the data
data = {key: pd.read_csv(path) for key, path in file_paths.items()}


def extract_card_symbols(card_string):
    return [card_string.split(',')[i].strip() for i in range(0, len(card_string.split(',')), 2)]


def replace_score_zero(card_list):
    return [] if card_list == ['(Score: 0)'] else card_list

# Process predictions data
for key in ['3_card_covered_predictions', 'one_card_predictions', '2_card_open_predictions', '2_card_covered_predictions', '3_card_open_predictions', '1_card_high_light_predictions', '1_card_low_light_predictions']:
    data[key]['Dealer_Cards'] = data[key]['Dealer'].apply(extract_card_symbols)
    data[key]['Player_Cards'] = data[key]['Player'].apply(extract_card_symbols)
    data[key]['Player_Cards'] = data[key]['Player_Cards'].apply(replace_score_zero)
    data[key] = data[key][['Dealer_Cards', 'Player_Cards']]

# Process actual data
for key in ['one_card_actual', '2_card_open_actual', '2_card_covered_actual', '3_card_open_actual', '3_card_covered_actual']:
    data[key]['Dealer_Cards'] = data[key]['Dealer'].apply(extract_card_symbols)
    data[key]['Player_Cards'] = data[key]['Player'].apply(extract_card_symbols)
    data[key] = data[key][['Dealer_Cards', 'Player_Cards']]

# Merge predictions and actuals
def merge_actual_predicted(actual, predicted):
    merged = pd.DataFrame({
        'Actual': actual['Dealer_Cards'] + actual['Player_Cards'],
        'Predicted': predicted['Dealer_Cards'] + predicted['Player_Cards']
    })
    return merged



merged_data = {
    'one_card': merge_actual_predicted(data['one_card_actual'], data['one_card_predictions']),
    '2_card_open': merge_actual_predicted(data['2_card_open_actual'], data['2_card_open_predictions']),
    '2_card_covered': merge_actual_predicted(data['2_card_covered_actual'], data['2_card_covered_predictions']),
    '3_card_open': merge_actual_predicted(data['3_card_open_actual'], data['3_card_open_predictions']),
    '3_card_covered': merge_actual_predicted(data['3_card_covered_actual'], data['3_card_covered_predictions'])
}

one_card_predicted_df = data['one_card_predictions']
two_card_predicted_open_df = data['2_card_open_predictions']
two_card_predicted_closed_df = data['2_card_covered_predictions']
three_card_predicted_open_df = data['3_card_open_predictions']
three_card_predicted_closed_df = data['3_card_covered_predictions']
one_card_low_light_predicted_df = data['1_card_low_light_predictions']
one_card_high_light_predicted_df = data['1_card_high_light_predictions']
print(one_card_low_light_predicted_df)

     Dealer_Cards Player_Cards
0           [10D]         [8D]
1            [4H]           []
2            [9D]         [9S]
3            [2H]         [4S]
4           [10C]         [7D]
..            ...          ...
99           [AS]         [QC]
100         [10H]         [JS]
101          [KH]           []
102          [AD]           []
103  [(Score: 0)]         [2H]

[104 rows x 2 columns]


In [5]:

file_paths = {
    '3_card_covered_predictions': 'data/3_card_covered_predictions.csv',
    'one_card_actual': 'data/actual_1_card.csv',
    'one_card_predictions': 'data/predictions_one_card.csv',
    '2_card_open_actual': 'data/2_card_open_actual.csv',
    '2_card_open_predictions': 'data/predictions_2_card_open.csv',
    '2_card_covered_actual': 'data/actual_2_card_covered.csv',
    '2_card_covered_predictions': 'data/predictions_2_card_covered.csv',
    '3_card_open_actual': 'data/3_card_open_actual.csv',
    '3_card_open_predictions': 'data/predictions_open_3_card.csv',
    '3_card_covered_actual': 'data/3_card_covered_actual.csv',
    '3_card_covered_predictions': 'data/3_card_covered_predictions.csv',
    '1_card_low_light_predictions': 'data/one_card_low_light_predictions.csv',
    '1_card_low_light_actual': 'data/one_card_low_light_actual.csv',
    '1_card_high_light_predictions': 'data/one_card_high_light_predictions.csv',
    '1_card_high_light_actual': 'data/one_card_high_light_actual.csv'
}

# Load the data
data = {key: pd.read_csv(path) for key, path in file_paths.items()}


def extract_card_symbols(card_string):
    return [card.strip() for card in card_string.split(',')]


def replace_score_zero(card_list):
    return [] if card_list == ['(Score: 0)'] else card_list

# Ensure correct number of card symbols
def check_card_count(card_list, expected_count):
    if len(card_list) != expected_count:
        raise ValueError(f"Expected {expected_count} cards, found {len(card_list)} cards.")
    return card_list

# Process predictions data
for key in ['3_card_covered_predictions', 'one_card_predictions', '2_card_open_predictions', '2_card_covered_predictions', '3_card_open_predictions']:
    data[key]['Dealer_Cards'] = data[key]['Dealer'].apply(extract_card_symbols)
    data[key]['Player_Cards'] = data[key]['Player'].apply(extract_card_symbols)
    data[key]['Player_Cards'] = data[key]['Player_Cards'].apply(replace_score_zero)
    data[key] = data[key][['Dealer_Cards', 'Player_Cards']]

# Process actual data
actual_card_counts = {
    'one_card_actual': 1,
    '2_card_open_actual': 2,
    '2_card_covered_actual': 2,
    '3_card_open_actual': 3,
    '3_card_covered_actual': 3,
    '1_card_high_light_actual': 1,
    '1_card_low_light_actual': 1

}

for key in actual_card_counts:
    data[key]['Dealer_Cards'] = data[key]['Dealer'].apply(extract_card_symbols).apply(lambda x: check_card_count(x, actual_card_counts[key]))
    data[key]['Player_Cards'] = data[key]['Player'].apply(extract_card_symbols).apply(lambda x: check_card_count(x, actual_card_counts[key]))
    data[key] = data[key][['Dealer_Cards', 'Player_Cards']]

# Merge predictions and actuals
def merge_actual_predicted(actual, predicted):
    merged = pd.DataFrame({
        'Actual': actual['Dealer_Cards'] + actual['Player_Cards'],
        'Predicted': predicted['Dealer_Cards'] + predicted['Player_Cards']
    })
    return merged

merged_data = {
    'one_card': merge_actual_predicted(data['one_card_actual'], one_card_predicted_df),
    '2_card_open': merge_actual_predicted(data['2_card_open_actual'], two_card_predicted_open_df),
    '2_card_covered': merge_actual_predicted(data['2_card_covered_actual'], two_card_predicted_closed_df),
    '3_card_open': merge_actual_predicted(data['3_card_open_actual'], three_card_predicted_open_df),
    '3_card_covered': merge_actual_predicted(data['3_card_covered_actual'], three_card_predicted_closed_df),
    '1_card_low_light': merge_actual_predicted(data['1_card_low_light_actual'], one_card_low_light_predicted_df),
    '1_card_high_light': merge_actual_predicted(data['1_card_high_light_actual'], one_card_high_light_predicted_df),
}

# Display the merged data
for key, df in merged_data.items():
    print(f"--- {key} ---")
    print(df)
    print()

--- one_card ---
        Actual  Predicted
0    [9C, 10S]  [9C, 10S]
1     [6H, 2C]       [6H]
2     [5S, JC]   [5S, JC]
3     [7D, 7C]   [7D, 7C]
4     [QH, 5H]   [QH, 5H]
..         ...        ...
99    [QH, JD]   [QH, JD]
100   [8D, AD]   [8D, AD]
101  [10H, 9C]  [10H, 9C]
102   [6H, 5H]   [6H, 5H]
103   [2C, 4H]   [2C, 4H]

[104 rows x 2 columns]

--- 2_card_open ---
                Actual          Predicted
0     [7D, 7C, QH, 3H]       [7D, 3H, QH]
1     [JC, KH, KC, 6H]       [KH, JC, 6H]
2     [9C, 5D, 9H, 3S]   [5D, 9C, 3S, 9H]
3     [JD, JS, 6S, AS]   [JD, JS, 6S, AS]
4     [JH, 2D, 3C, 4C]           [2D, JH]
5    [QD, 10H, 8H, 8D]  [10H, QD, 8D, 8H]
6    [8C, 9D, 4D, 10D]  [9D, 8C, 10D, 4D]
7     [6C, 4S, 8S, QS]       [6C, 4S, QS]
8     [2H, 2C, AD, 6D]       [2H, AD, 6D]
9     [3D, 7H, 7S, KD]   [7H, 3D, 7S, KD]
10    [5S, AC, QC, 4H]   [AC, 5S, QC, 4H]
11    [AH, 9S, 5H, 2S]   [AH, 9S, 2S, 5H]
12  [KS, 5C, 10C, 10S]     [KS, 10C, 10S]
13   [4H, QC, AC, 10D]  [4H, QC, 10D, 

In [6]:
# Function to calculate accuracy and misclassified cards

from collections import Counter

def calculate_accuracy_and_misclassifications(merged_df, total_cards):
    total_correct_cards = 0
    perfect_sets = 0
    misclassified_cards = Counter()
    
    for index, row in merged_df.iterrows():
        actual_set = set(row['Actual'])
        predicted_set = set(row['Predicted'])
        
        correct_count = len(actual_set & predicted_set)
        total_correct_cards += correct_count
        
        misclassified_set = actual_set - predicted_set
        misclassified_cards.update(misclassified_set)
        
        if actual_set == predicted_set:
            perfect_sets += 1
    
    average_correct_cards_percentage = (total_correct_cards / (len(merged_df) * total_cards)) * 100
    perfect_set_accuracy = (perfect_sets / len(merged_df)) * 100
    
    return average_correct_cards_percentage, perfect_set_accuracy, misclassified_cards

# Total cards per condition
total_cards_per_condition = {
    'one_card': 2,
    '2_card_open': 4,
    '2_card_covered': 4,
    '3_card_open': 6,
    '3_card_covered': 6,
    '1_card_high_light': 2,
    '1_card_low_light': 2
}

# Calculate and display accuracy and misclassifications for each dataset
for key, df in merged_data.items():
    total_cards = total_cards_per_condition[key]
    avg_correct_cards_percentage, perfect_set_acc_percentage, misclassified_cards = calculate_accuracy_and_misclassifications(df, total_cards)
    
    print(f"--- {key} ---")
    print(f"Average Correct Cards: {avg_correct_cards_percentage}%")
    print(f"Perfect Set Accuracy: {perfect_set_acc_percentage}%")
    print("Most Misclassified Cards:")
    for card, count in misclassified_cards.most_common(5):
        print(f"{card}: {count}")
    print()

--- one_card ---
Average Correct Cards: 92.78846153846155%
Perfect Set Accuracy: 85.57692307692307%
Most Misclassified Cards:
5C: 4
8C: 3
3C: 2
AC: 2
2C: 1

--- 2_card_open ---
Average Correct Cards: 88.46153846153845%
Perfect Set Accuracy: 61.53846153846154%
Most Misclassified Cards:
5C: 4
7C: 3
KC: 3
4C: 3
2C: 2

--- 2_card_covered ---
Average Correct Cards: 90.38461538461539%
Perfect Set Accuracy: 63.46153846153846%
Most Misclassified Cards:
3C: 4
2C: 2
JC: 2
5C: 2
7C: 1

--- 3_card_open ---
Average Correct Cards: 70.37037037037037%
Perfect Set Accuracy: 5.555555555555555%
Most Misclassified Cards:
8C: 5
2C: 4
5C: 4
3C: 4
AC: 4

--- 3_card_covered ---
Average Correct Cards: 77.77777777777779%
Perfect Set Accuracy: 25.0%
Most Misclassified Cards:
3C: 3
AC: 3
2C: 3
AS: 3
9C: 2

--- 1_card_low_light ---
Average Correct Cards: 78.36538461538461%
Perfect Set Accuracy: 58.65384615384615%
Most Misclassified Cards:
8C: 4
2C: 4
3S: 4
JC: 3
KC: 3

--- 1_card_high_light ---
Average Correct Car

In [7]:
import re

def extract_confidence(data, column):
    """
    Extracts confidence values from the specified column in the dataset.
    
    Args:
    - data (pd.DataFrame): The dataframe containing prediction data.
    - column (str): The column from which to extract confidence values.
    
    Returns:
    - List of tuples containing (suit, confidence).
    """
    suit_confidence = []
    for row in data[column]:
        cards = re.findall(r'(\d*[JQKA]?[CDHS]),\s*([\d.]+)', row)
        for card, confidence in cards:
            suit = card[-1]  # last character represents the suit
            suit_confidence.append((suit, float(confidence)))
    return suit_confidence

# Extract confidence values for each prediction dataset
suit_confidences = []
prediction_columns = {
    "predictions_one_card": ["Dealer", "Player"],
    "predictions_2_card_open": ["Dealer", "Player"],
    "predictions_2_card_covered": ["Dealer", "Player"],
    "predictions_open_3_card": ["Dealer", "Player"],
    "3_card_covered_predictions": ["Dealer", "Player"]
}

for file, columns in prediction_columns.items():
    for column in columns:
        suit_confidences.extend(extract_confidence(dataframes[file], column))

# Calculate average confidence per suit
suit_confidence_df = pd.DataFrame(suit_confidences, columns=["Suit", "Confidence"])
average_confidence_per_suit = suit_confidence_df.groupby("Suit")["Confidence"].mean().to_dict()

average_confidence_per_suit


{'C': 0.6580698163750001,
 'D': 0.7774713856133829,
 'H': 0.7691194954195805,
 'S': 0.7323304054545454}

In [10]:
# determine if the difference in confidence levels is statistically significant.

from scipy.stats import f_oneway

# Perform ANOVA test
suits = suit_confidence_df["Suit"].unique()
confidence_values_by_suit = [suit_confidence_df[suit_confidence_df["Suit"] == suit]["Confidence"].values for suit in suits]

anova_result = f_oneway(*confidence_values_by_suit)
average_confidence_per_suit = suit_confidence_df.groupby("Suit")["Confidence"].mean().to_dict()
#anova results
print("Average confidence per suit:", average_confidence_per_suit)
print("ANOVA Result:", anova_result)

Average confidence per suit: {'C': 0.6580698163750001, 'D': 0.7774713856133829, 'H': 0.7691194954195805, 'S': 0.7323304054545454}
ANOVA Result: F_onewayResult(statistic=75.81703181871964, pvalue=6.224143509250041e-44)


In [9]:
# Function to calculate accuracy
def calculate_accuracy(merged_df, total_cards):
    total_correct_cards = 0
    perfect_sets = 0
    
    for index, row in merged_df.iterrows():
        actual_set = set(row['Actual'])
        predicted_set = set(row['Predicted'])
        
        correct_count = len(actual_set & predicted_set)
        total_correct_cards += correct_count
        
        if actual_set == predicted_set:
            perfect_sets += 1
    
    average_correct_cards_percentage = (total_correct_cards / (len(merged_df) * total_cards)) * 100
    perfect_set_accuracy = (perfect_sets / len(merged_df)) * 100
    
    return average_correct_cards_percentage, perfect_set_accuracy

# Total cards per condition
total_cards_per_condition = {
    'one_card': 2,
    '2_card_open': 4,
    '2_card_covered': 4,
    '3_card_open': 6,
    '3_card_covered': 6,
    '1_card_high_light': 2,
    '1_card_low_light': 2
}

# Calculate and display accuracy for each dataset
for key, df in merged_data.items():
    total_cards = total_cards_per_condition[key]
    avg_correct_cards_percentage, perfect_set_acc_percentage = calculate_accuracy(df, total_cards)
    print(f"--- {key} ---")
    print(f"Average Correct Cards: {avg_correct_cards_percentage}%")
    print(f"Perfect Set Accuracy: {perfect_set_acc_percentage}%")
    print()

--- one_card ---
Average Correct Cards: 92.78846153846155%
Perfect Set Accuracy: 85.57692307692307%

--- 2_card_open ---
Average Correct Cards: 88.46153846153845%
Perfect Set Accuracy: 61.53846153846154%

--- 2_card_covered ---
Average Correct Cards: 90.38461538461539%
Perfect Set Accuracy: 63.46153846153846%

--- 3_card_open ---
Average Correct Cards: 70.37037037037037%
Perfect Set Accuracy: 5.555555555555555%

--- 3_card_covered ---
Average Correct Cards: 77.77777777777779%
Perfect Set Accuracy: 25.0%

--- 1_card_low_light ---
Average Correct Cards: 78.36538461538461%
Perfect Set Accuracy: 58.65384615384615%

--- 1_card_high_light ---
Average Correct Cards: 66.34615384615384%
Perfect Set Accuracy: 41.34615384615385%

