In [1]:
import pandas as pd 
import numpy as np

In [2]:
data = pd.read_csv('data/last_comix_characters_info.csv')
data.head()

Unnamed: 0,Name,Alignment,Gender,EyeColor,Race,HairColor,Publisher,SkinColor,Height,Weight
0,Alfred Pennyworth,good,Male,blue,Human,Black,DC Comics,-,178,72
1,Ant-Man,good,Male,blue,Human,Blond,Marvel Comics,-,211,122
2,Anti-Venom,-,Male,blue,Symbiote,Blond,Marvel Comics,-,229,358
3,Aqualad,good,Male,blue,Atlantean,Black,DC Comics,-,178,106
4,Aquaman,good,Male,blue,Atlantean,Blond,DC Comics,-,185,146


In [3]:
X = data.drop(['Name'], axis=1)
y = data['Name']

In [4]:
X_d = pd.get_dummies(X.drop(['Height', 'Weight'], axis=1), columns=X.drop(['Height', 'Weight'], axis=1).columns)
X_d

Unnamed: 0,Alignment_-,Alignment_bad,Alignment_good,Alignment_neutral,Gender_Female,Gender_Male,EyeColor_-,EyeColor_black,EyeColor_blue,EyeColor_brown,...,Publisher_Shueisha,Publisher_Sony Pictures,Publisher_Titan Books,SkinColor_-,SkinColor_blue,SkinColor_green,SkinColor_purple,SkinColor_red,SkinColor_silver,SkinColor_white
0,False,False,True,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
1,False,False,True,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
2,True,False,False,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
3,False,False,True,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
4,False,False,True,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,False,False,True,False,False,True,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
118,False,False,True,False,True,False,False,False,True,False,...,False,False,False,True,False,False,False,False,False,False
119,False,False,True,False,True,False,False,False,False,False,...,False,False,False,True,False,False,False,False,False,False
120,False,False,True,False,False,True,False,False,False,True,...,False,False,False,False,False,True,False,False,False,False


In [5]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(criterion='entropy', max_depth=9, random_state=0)

model.fit(X_d, y)

In [6]:
def format_feature(feature):
    parts = feature.split('_')
    property_name = ' '.join(parts[:-1])
    property_value = parts[-1]

    if property_value == '-':
        property_value = "not specified" 

    return f"Does the character's {property_name} {property_value}?"


In [24]:
def ask_question(feature):
    question = format_feature(feature)
    while True:
        print(f"{question}\n➤ yes\n➤ no\n➤ back")
        answer = input("Your choice: ").strip()
        if answer.lower() in ['yes', 'no', 'back']:
            return answer
        else:
            print("Please enter 'yes', 'no', or 'back'.")

In [8]:
def print_header(title):
    print("\n" + "=" * 30)
    print(f"{title:^30}")
    print("=" * 30)
    print()

In [9]:
def display_progress_bar(progress, total_length=20):
    # Convert progress percentage to the number of filled positions in the bar
    filled_length = int(round(total_length * progress / 100))
    bar = '█' * filled_length + '-' * (total_length - filled_length)
    return f"[{bar}]"

In [35]:
def akinator_game(model, features, data):
    
    node = 0 # for motion in tree 
    visited_nodes = [] # for come back in leaf with (back) 
    last_predictions = [] # for extract predicts in the end
    question_count = 0  # for keep track of the number of questions asked

    print_header("Welcome to Akinator Game")
    
    end_perdeict_count = 0
    start_index = 0
    
    while True:
        
        # Checking to see if it's not a leaf     
        if model.tree_.children_left[node] == model.tree_.children_right[node]:
            
            count = model.tree_.n_node_samples[node]
            prediction_indices = model.tree_.value[node].argsort()[0][::-1][:count]
            
            prediction_index=0
            while prediction_index < len(prediction_indices):
                
                # Skip over any predictions we've already made
                while model.classes_[prediction_indices[prediction_index]] in last_predictions:
                    prediction_index += 1
                    if prediction_index >= len(prediction_indices):
                        print("🙁🔄 Oops! I'm out of guesses! Let's start over. 🔄🙁")
                        return
                                      
                
                
                # All possible characters
                prediction = model.classes_[prediction_indices[prediction_index]]
                last_predictions.append(prediction)

                while True:
                    print(f"Is the character {prediction}?\n✔ yes\n✘ no")
                    user_confirmation = input("Your choice: ")
                    if user_confirmation.lower() not in ['yes', 'no']:
                        print("Please enter 'yes' or 'no'.")
                    else:
                        break
                        
                if user_confirmation.lower() == 'yes':
                    print("🎉🌟 Great! The character has been guessed correctly! 🌟🎉")
                    return
                else:
                    # Skip over any predictions we've already made
                    while model.classes_[prediction_indices[prediction_index]] in last_predictions:
                        prediction_index += 1
                        if prediction_index >= len(prediction_indices):
                            print("🙁🔄 Oops! I'm out of guesses! Let's start over. 🔄🙁")
                            return
 
                    # Check for calculate progress 
                    for p in last_predictions[start_index:]:
                        if p in model.classes_[prediction_indices]:
                            end_perdeict_count += 1
                    start_index = len(last_predictions)  
                
                    # Calculate progress
                    remaining_characters = np.count_nonzero(model.tree_.value[node]) - end_perdeict_count
                    remaining_characters = max(remaining_characters, 1)  # Avoid division by zero
                    progress = (1 / remaining_characters) * 100
                    progress_bar = display_progress_bar(progress)
                    if prediction_index >= len(prediction_indices):
                        print("🙁🔄 Oops! I'm out of guesses! Let's start over. 🔄🙁")
                        return
                        
                    # Display progress
                    print(f"Progress: {progress_bar} {progress:.2f}%\n")
                    
                    print("Let's try another guess.")
             

        # Checking whether the leaf node
        else:
            
            # Increment the question count
            question_count += 1  
               
            visited_nodes.append(node)  # Adding a Node to the Stack Before Moving
            
            # Getting the index of a feature and asking a question
            feature_index = model.tree_.feature[node]
            feature_name = features[feature_index]
            answer = ask_question(feature_name)
            # Move to next or previous node
            if answer.lower() == 'back':
                question_count-=1
                if len(visited_nodes) > 1:  # Make sure you have somewhere to come back to.
                    visited_nodes.pop()  # Delete current node
                    node = visited_nodes.pop()  # Return to previous node
                    continue
                else:
                    print("You are at the first question, can't go back further.")
                    continue
            elif answer.lower() == 'no':
                node = model.tree_.children_left[node]
            else:
                node = model.tree_.children_right[node]

            if model.tree_.children_left[node] == model.tree_.children_right[node]:
                count = model.tree_.n_node_samples[node]
                prediction_indices = model.tree_.value[node].argsort()[0][::-1][:count]

                prediction_index=0
                # Skip over any predictions we've already made
                while model.classes_[prediction_indices[prediction_index]] in last_predictions:
                    prediction_index += 1
                    if prediction_index >= len(prediction_indices):
                        print("🙁🔄 Oops! I'm out of guesses! Let's start over. 🔄🙁")
                        return

                if prediction_index < len(prediction_indices):
                    if model.classes_[prediction_indices[prediction_index]] in last_predictions:
                        prediction_index += 1

                # Check for calculate progress 
                for p in last_predictions[start_index:]:
                    if p in model.classes_[prediction_indices]:
                        end_perdeict_count += 1
                start_index = len(last_predictions)
                
                
                
            
            # Calculate and display progress
            remaining_characters = np.count_nonzero(model.tree_.value[node]) - end_perdeict_count
            remaining_characters = max(remaining_characters, 1)  # Avoid division by zero
            progress = (1 / remaining_characters) * 100
            progress_bar = display_progress_bar(progress)
            print(f"Progress: {progress_bar} {progress:.2f}%\n")  
                

        # predict after each 2 questions
        if (question_count % 2 == 0 and question_count != 0) and model.tree_.children_left[node] != model.tree_.children_right[node]:
            
            # Sort the predictions by probability
            sorted_predictions = model.tree_.value[node][0].argsort()[::-1]
            
            # Iterate over the sorted predictions
            for guess_index in sorted_predictions:
                guess = model.classes_[guess_index]
            
                # Check if this guess has already been made
                if guess not in last_predictions:
                    
                    while True:
                        print(f"My guess: Is the character {guess}?\n✔ yes\n✘ no")
                        user_confirmation = input("Your choice: ")
                        if user_confirmation.lower() in ['yes', 'no']:
                            break
                        else:
                            print("Please enter 'yes' or 'no'.")
                            
                    if user_confirmation.lower() == 'yes':
                        print("🎉🌟 Great! The character has been guessed correctly! 🌟🎉")
                        return
                    else:
                        last_predictions.append(guess)
                        break
        
            # Calculate and display progress
            remaining_characters = np.count_nonzero(model.tree_.value[node]) - 1
            remaining_characters = max(remaining_characters, 1)  # Avoid division by zero
            progress = (1 / remaining_characters) * 100
            progress_bar = display_progress_bar(progress)
            print(f"Progress: {progress_bar} {progress:.2f}%\n")    

In [37]:
# Игра
features = X_d.columns
akinator_game(model, features, X_d)


   Welcome to Akinator Game   

Does the character's Publisher Marvel Comics?
➤ yes
➤ no
➤ back


Your choice:  yes


Progress: [--------------------] 1.61%

Does the character's Race Human?
➤ yes
➤ no
➤ back


Your choice:  yes


Progress: [█-------------------] 3.57%

My guess: Is the character She-Hulk?
✔ yes
✘ no


Your choice:  no


Progress: [█-------------------] 3.70%

Does the character's EyeColor brown?
➤ yes
➤ no
➤ back


Your choice:  yes


Progress: [█-------------------] 7.14%

Does the character's HairColor Black?
➤ yes
➤ no
➤ back


Your choice:  yes


Progress: [███-----------------] 16.67%

My guess: Is the character Spider-Man?
✔ yes
✘ no


Your choice:  yes


🎉🌟 Great! The character has been guessed correctly! 🌟🎉


In [12]:
# from sklearn.tree import export_graphviz

# # Export as dot file
# export_graphviz(model, out_file='output/tree.dot', 
#                 feature_names = features,
#                 class_names = model.classes_,
#                 rounded = True, proportion = False, 
#                 precision = 2, filled = True)



In [13]:
# # Read the generated dot file
# with open('output/tree.dot', 'r') as file:
#     dot_data = file.readlines()

# # Function to insert node numbers
# def insert_node_numbers(dot_data):
#     node_count = 0
#     for i, line in enumerate(dot_data):
#         if '->' not in line and '[label="' in line:
#             # Inserting the node number
#             line_parts = line.split('[label="')
#             line_parts[1] = f'Node {node_count}: ' + line_parts[1]
#             dot_data[i] = '[label="'.join(line_parts)
#             node_count += 1
#     return dot_data

# # Modify the dot data
# modified_dot_data = insert_node_numbers(dot_data)

# # Write the modified dot data back to the file
# with open('output/tree_modified.dot', 'w') as file:
#     file.writelines(modified_dot_data)
