<a href="https://colab.research.google.com/github/andylehti/MBTIspectrum/blob/main/MBTIAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from io import StringIO

# Input data as a multi-line string
data_str = """
Personality	Probability
INTP	0.199739
INFJ	0.157179
INTJ	0.152837
INFP	0.150595
ENTP	0.079007
ENFP	0.049553
ISTP	0.041250
ISFP	0.034651
ESTJ	0.029317
ENTJ	0.026273
ISTJ	0.024670
ENFJ	0.022473
ISFJ	0.016302
ESTP	0.009268
ESFP	0.004149
ESFJ	0.002737
"""

# Read the data into a DataFrame
df = pd.read_csv(StringIO(data_str.strip()), sep='\t')

def extrapolate(df):
    # Initialize lists for the new columns
    I_col, N_col, T_col, J_col = [], [], [], []
    Percentage_col = []
    I_perc, E_perc, N_perc, S_perc = [], [], [], []
    T_perc, F_perc, J_perc, P_perc = [], [], [], []

    for idx, row in df.iterrows():
        personality = row['Personality']
        prob = row['Probability']
        percentage = prob * 100

        # Split the personality code into its components
        letters = list(personality)
        I_E = letters[0]
        N_S = letters[1]
        T_F = letters[2]
        J_P = letters[3]

        # Append the components to their respective lists
        I_col.append(I_E)
        N_col.append(N_S)
        T_col.append(T_F)
        J_col.append(J_P)
        Percentage_col.append(round(percentage, 3))

        # Calculate percentages for each dimension
        I_perc.append(percentage if I_E == 'I' else 0)
        E_perc.append(percentage if I_E == 'E' else 0)
        N_perc.append(percentage if N_S == 'N' else 0)
        S_perc.append(percentage if N_S == 'S' else 0)
        T_perc.append(percentage if T_F == 'T' else 0)
        F_perc.append(percentage if T_F == 'F' else 0)
        J_perc.append(percentage if J_P == 'J' else 0)
        P_perc.append(percentage if J_P == 'P' else 0)

    # Create the new DataFrame
    df_extrapolated = pd.DataFrame({
        'I': I_col,
        'N': N_col,
        'T': T_col,
        'J': J_col,
        'Probability': df['Probability'],
        'Percentage': Percentage_col,
        'I%': I_perc,
        'E%': E_perc,
        'N%': N_perc,
        'S%': S_perc,
        'T%': T_perc,
        'F%': F_perc,
        'J%': J_perc,
        'P%': P_perc
    })

    # Calculate the totals for each dimension
    totals = {
        'Dimension': ['Total'],
        'I%': [sum(I_perc)],
        'E%': [sum(E_perc)],
        'N%': [sum(N_perc)],
        'S%': [sum(S_perc)],
        'T%': [sum(T_perc)],
        'F%': [sum(F_perc)],
        'J%': [sum(J_perc)],
        'P%': [sum(P_perc)]
    }
    df_totals = pd.DataFrame(totals)

    return df_extrapolated, df_totals

# Run the extrapolation function
df_extrapolated, df_totals = extrapolate(df)

# Display the extrapolated data
print("Extrapolated Data:")
df_extrapolated.reset_index(drop=True, inplace=True)
display(df_extrapolated)

# Display the totals
print("\nTotals for Each Dimension:")
display(df_totals)
