In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

try:
    df = pd.read_csv('intro_extro.csv', encoding='latin-1')
except FileNotFoundError:
    print("Error: 'intro_extro.csv' not found. Please provide the correct file.")
    exit()

# Step 1: Strip any unnecessary whitespace from column names
df.columns = df.columns.str.strip()

# Step 2: Check column names to ensure they match the expected list
# print(df.columns)

# Step 3: Update the list of columns to ensure they match
introvert_cols = ['I am quiet around strangers.', 'Panic easily.', 'Try to follow the rules.',
                  'Get irritated easily.','I have little to say.']

extrovert_cols = ['Make friends easily.', 'Love large parties.', 'Like order.', 'Take charge.',
                  'Like to visit new places.']

# Step 4: Fill missing values (if any) and calculate introvert/extrovert scores
df['introvert_score'] = df[introvert_cols].fillna(0).sum(axis=1)
df['extrovert_score'] = df[extrovert_cols].fillna(0).sum(axis=1)

# Now you can create the personality column as before
df['personality'] = (df['extrovert_score'] > df['introvert_score']).astype(int)


X = df.drop(columns=['introvert_score', 'extrovert_score', 'personality'])
y = df['personality']

for col in X.columns:
    if pd.api.types.is_object_dtype(X[col]):
        try:
            X[col] = pd.to_numeric(X[col])
        except ValueError:
            print(f"Column '{col}' contains non-numeric values. Please clean your data.")
            exit()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")
def predict():
    # List of questions and corresponding personality traits
    questions = [
        ("Do you prefer to be alone? (Yes/No)", "I am quiet around strangers."),
        ("Do you panic easily? (Yes/No)", "Panic easily."),
        ("Do you try to follow rules? (Yes/No)", "Try to follow the rules."),
        ("Do you get irritated easily? (Yes/No)", "Get irritated easily."),
        ("Do you have little to say? (Yes/No)", "I have little to say."),
        ("Do you make friends easily? (Yes/No)", "Make friends easily."),
        ("Do you love large parties? (Yes/No)", "Love large parties."),
        ("Do you like order? (Yes/No)", "Like order."),
        ("Do you take charge? (Yes/No)", "Take charge."),
        ("Do you like to visit new places? (Yes/No)", "Like to visit new places."),
    ]
    
    # Initializing the probabilities for introvert and extrovert to 50% each
    introvert_prob = 50.0
    extrovert_prob = 50.0
    
    # Function to increment or decrement probabilities based on user answers
    def update_probabilities(col_name, answer):
        nonlocal introvert_prob, extrovert_prob
        
        if answer == 'yes':
            # If the trait is related to introversion
            if col_name in introvert_cols:
                introvert_prob += 5 
                extrovert_prob -= 5  
            # If the trait is related to extroversion
            elif col_name in extrovert_cols:
                introvert_prob -= 5  
                extrovert_prob += 5  
        elif answer == 'no':
            # If the trait is related to introversion
            if col_name in introvert_cols:
                introvert_prob -= 5  
                extrovert_prob += 5  
            # If the trait is related to extroversion
            elif col_name in extrovert_cols:
                introvert_prob += 5  
                extrovert_prob -= 5 
    
    # Step through all the questions
    for question, col_name in questions:
        while True:
            answer = input(f"{question} (Yes/No): ").lower()
            if answer in ['yes', 'no']:
                update_probabilities(col_name, answer)
                break
            else:
                print("Invalid input. Please enter 'Yes' or 'No'.")
    
    # Ensure that the probabilities stay within the range 0 to 100
    introvert_prob = max(0, min(introvert_prob, 100))
    extrovert_prob = max(0, min(extrovert_prob, 100))
    
    # Final decision based on the final probabilities
    print(f"Final Introvert Probability: {introvert_prob:.2f}%")
    print(f"Final Extrovert Probability: {extrovert_prob:.2f}%")
    
    if introvert_prob > extrovert_prob:
        print("Based on your answers, you are likely an Introvert.")
    else:
        print("Based on your answers, you are likely an Extrovert.")
    return introvert_prob, extrovert_prob
predict()

Model accuracy: 1.0


Do you prefer to be alone? (Yes/No) (Yes/No):  yes
Do you panic easily? (Yes/No) (Yes/No):  no
Do you try to follow rules? (Yes/No) (Yes/No):  yes
Do you get irritated easily? (Yes/No) (Yes/No):  no
Do you have little to say? (Yes/No) (Yes/No):  no
Do you make friends easily? (Yes/No) (Yes/No):  yes
Do you love large parties? (Yes/No) (Yes/No):  no
Do you like order? (Yes/No) (Yes/No):  yes
Do you take charge? (Yes/No) (Yes/No):  no
Do you like to visit new places? (Yes/No) (Yes/No):  yes


Final Introvert Probability: 40.00%
Final Extrovert Probability: 60.00%
Based on your answers, you are likely an Extrovert.


(40.0, 60.0)