In [3]:
import numpy as np 
import pandas as pd 
#df = pd.read_csv("../Analyzed_Games/twic1556_15_analyzed.csv")
#df=pd.read_csv("../Analyzed_Games/test2_15_analyzed.csv")
df= pd.read_csv("../Analyzed_Games/twic920_15_analyzed.csv")


# Cleaning data (use Dorian's cleaning function instead of this for better results)

In [4]:
initial_game_count = df['GameID'].nunique()

# Step 1: Identify GameIDs with valid 'Result'
valid_result_games = df[df['Result'].isin(['1-0', '0-1', '1/2-1/2'])]['GameID'].unique()

# Step 2: Identify GameIDs with no missing 'WhiteFideId' or 'BlackFideId'
fide_valid_games = df.dropna(subset=['WhiteFideId', 'BlackFideId'])['GameID'].unique()

# Step 3: Find the intersection of valid games
valid_games = np.intersect1d(valid_result_games, fide_valid_games)

# Step 4: Filter the DataFrame to include only valid games
df_cleaned = df[df['GameID'].isin(valid_games)].copy()

# Record the final number of unique games
final_game_count = df_cleaned['GameID'].nunique()

# Calculate the number of games removed
removed_games = initial_game_count - final_game_count

# Reset the index
df_cleaned = df_cleaned.reset_index(drop=True)

# Create a mapping from old GameID to new sequential GameID
unique_games = df_cleaned['GameID'].unique()
game_id_mapping = {old_id: new_id for new_id, old_id in enumerate(unique_games, start=1)}

# Apply the mapping to fix 'GameID'
df_cleaned['GameID'] = df_cleaned['GameID'].map(game_id_mapping)

# Save the cleaned DataFrame to a new CSV file (optional)
# df_cleaned.to_csv("../huge_analyzed_games/combined_analyzed_games_cleaned.csv", index=False)

# Print the number of games removed
print(f"Number of games removed: {removed_games}")

Number of games removed: 11


# Creating Winning Chances column 

In [5]:

df['Evaluation'] = df['Evaluation'].astype(str).str.strip()
df['PlayerToMove'] = np.where(df['MoveNumber'] % 2 == 1, 'White', 'Black')

# Function to convert 'Evaluation' to 'New_evaluations'
def convert_evaluation(row):
    eval_str = row['Evaluation']
    
    if eval_str in ['+M0', '-M0', 'M0']:
        return 0.0  # Mate in 0 moves
    elif eval_str.startswith('+M') or (eval_str.startswith('M') and not eval_str.startswith('-M')):
        return 20.0  # White can mate
    elif eval_str.startswith('-M'):
        return -20.0  # Black can mate
    else:
        # Try to convert the evaluation to a float
        try:
            eval_float = float(eval_str)
            return eval_float  # Numeric evaluation remains the same
        except ValueError:
            return np.nan  # Unable to parse evaluation

# Apply the function to create 'New_evaluations' column
df['New_evaluations'] = df.apply(convert_evaluation, axis=1)

In [6]:
# Map 'Result' to outcome from White's perspective
def get_outcome(result):
    if result == '1-0':
        return 'Win'    # White won
    elif result == '0-1':
        return 'Loss'   # White lost
    elif result == '1/2-1/2':
        return 'Draw'   # Draw
    else:
        return None     # Exclude other results
    
    
def calculate_chances(df, lower_eval, upper_eval):
    # Filter positions where 'New_evaluations' is between lower_eval and upper_eval
    positions_in_range = df[(df['New_evaluations'] >= lower_eval) & (df['New_evaluations'] <= upper_eval)].copy()
    
    # Get unique GameIDs where this occurs
    games_in_range = positions_in_range['GameID'].unique()
    
    # Get the results of these games
    game_results = df[df['GameID'].isin(games_in_range)][['GameID', 'Result']].drop_duplicates()
    
    # Apply the mapping
    game_results['Outcome'] = game_results['Result'].apply(get_outcome)
    
    # Exclude games with 'Other' outcomes
    valid_results = game_results.dropna(subset=['Outcome'])
    
    # Total number of valid games
    total_valid_games = valid_results.shape[0]
    outcome_counts=None
    if total_valid_games == 0:
        winning_chance = drawing_chance = losing_chance = 0.0
    else:
        # Count the number of games in each category
        outcome_counts = valid_results['Outcome'].value_counts()
        
        # Calculate percentages
        winning_chance = (outcome_counts.get('Win', 0) / total_valid_games) * 100
        drawing_chance = (outcome_counts.get('Draw', 0) / total_valid_games) * 100
        losing_chance = (outcome_counts.get('Loss', 0) / total_valid_games) * 100
    
    return [winning_chance, drawing_chance, losing_chance, total_valid_games,outcome_counts]


calculate_chances(df,-21,-19)

[2.0408163265306123,
 1.0204081632653061,
 96.93877551020408,
 98,
 Outcome
 Loss    95
 Win      2
 Draw     1
 Name: count, dtype: int64]

In [8]:
results_df=pd.read_csv("winning_chances_adjusted.csv")
intervals = np.arange(-21, 21.5, 0.2)
intervals = np.round(intervals, decimals=1)
bin_labels = [f"({intervals[i]}, {intervals[i+1]}]" for i in range(len(intervals) - 1)]

# Bin 'New_evaluations' in 'df' to create an 'Interval' column
df['Interval'] = pd.cut(
    df['New_evaluations'],
    bins=intervals,
    labels=bin_labels,
    right=True,
    include_lowest=True,
)

# Merge 'df' with 'results_df' on 'Interval' to get 'WinningChance'
df = df.merge(results_df[['Interval', 'WinningChance', "LosingChance"]], on='Interval', how='left')

# Rename 'WinningChance' column to 'Winning_Chance' in 'df'
df.rename(columns={'WinningChance': 'Winning_Chance'}, inplace=True)


#df.to_csv("../huge_analyzed_games/combined_analyzed_15_16_winning_chances.csv")

# Creating the table (no need to run this yourselves)

In [None]:
# Assume 'df' is your DataFrame and 'calculate_chances' function is already defined

# Define the intervals
intervals = np.arange(-20.2, 20.2, 0.2)
intervals = np.round(intervals, decimals=1)
# Prepare a list to hold the results
results = []

# Loop over intervals
for i in range(len(intervals) - 1):
    lower_eval = intervals[i]
    upper_eval = intervals[i + 1]
    
    # Call the calculate_chances function
    winning_chance, drawing_chance, losing_chance, total_valid_games = calculate_chances(df, lower_eval, upper_eval)[:4]
    
    # Store the results
    results.append({
        'Interval': f"({lower_eval}, {upper_eval}]",
        'LowerEval': lower_eval,
        'UpperEval': upper_eval,
        'WinningChance': winning_chance,
        'DrawingChance': drawing_chance,
        'LosingChance': losing_chance,
        'TotalGames': total_valid_games,
    })

# Create a DataFrame from the results
results_df = pd.DataFrame(results)

# Now, adjust the DataFrame to fill intervals with TotalGames == 0
# Find the closest interval with TotalGames > 0 and copy its chances

# Create a DataFrame of intervals with TotalGames > 0
non_zero_df = results_df[results_df['TotalGames'] > 0].reset_index(drop=True)

# Function to fill in chances for intervals with TotalGames == 0
def fill_chances(row):
    if row['TotalGames'] > 0:
        # Keep original values
        return row[['WinningChance', 'DrawingChance', 'LosingChance']]
    else:
        lower_eval = row['LowerEval']
        # Compute absolute difference in LowerEval
        diffs = (non_zero_df['LowerEval'] - lower_eval).abs()
        min_idx = diffs.idxmin()
        closest_row = non_zero_df.loc[min_idx]
        return closest_row[['WinningChance', 'DrawingChance', 'LosingChance']]

# Apply the function to fill in the missing chances
filled_chances = results_df.apply(fill_chances, axis=1)

# Assign the filled values back to the DataFrame
results_df[['WinningChance', 'DrawingChance', 'LosingChance']] = filled_chances

# Remove the 'LowerEval' and 'UpperEval' columns
results_df = results_df.drop(columns=['LowerEval', 'UpperEval'])

#results_df.to_csv('winning_chances.csv', index=False)

In [None]:
# Adjust the 'WinningChance' column to be monotonically increasing
winning_chances = results_df['WinningChance'].values
for i in range(1, len(winning_chances)):
    if winning_chances[i] < winning_chances[i-1]:
        winning_chances[i] = winning_chances[i-1]
results_df['WinningChance'] = winning_chances

# Adjust the 'LosingChance' column to be monotonically decreasing
losing_chances = results_df['LosingChance'].values
for i in range(len(losing_chances)-2, -1, -1):
    if losing_chances[i] < losing_chances[i+1]:
        losing_chances[i] = losing_chances[i+1]
results_df['LosingChance'] = losing_chances

# Save the modified DataFrame back to CSV
results_df.to_csv('winning_chances_adjusted.csv', index=False)