In [3]:
from main import create_data
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import random

df = create_data(200)
df = df.T

summary_stats = df[['h', 'e', 'x', 'a', 'c', 'o']].describe()

print(summary_stats)

                h           e           x           a           c           o
count  200.000000  200.000000  200.000000  200.000000  200.000000  200.000000
mean     2.372500    2.530625    2.564062    2.526562    2.470000    2.518125
std      0.350981    0.352329    0.365963    0.296377    0.370840    0.358712
min      1.500000    1.750000    1.500000    1.687500    1.375000    1.625000
25%      2.125000    2.250000    2.312500    2.312500    2.250000    2.250000
50%      2.312500    2.500000    2.562500    2.562500    2.500000    2.562500
75%      2.625000    2.812500    2.812500    2.750000    2.703125    2.750000
max      3.250000    3.562500    3.625000    3.312500    3.312500    3.312500


In [4]:
# print the summary statistics for each of the columns (H E X A C O) in df
for col in ['h', 'e', 'x', 'a', 'c', 'o']:
    print(f"Summary statistics for {col}:")
    print(summary_stats[col])
    print("\n")

Summary statistics for h:
count    200.000000
mean       2.372500
std        0.350981
min        1.500000
25%        2.125000
50%        2.312500
75%        2.625000
max        3.250000
Name: h, dtype: float64


Summary statistics for e:
count    200.000000
mean       2.530625
std        0.352329
min        1.750000
25%        2.250000
50%        2.500000
75%        2.812500
max        3.562500
Name: e, dtype: float64


Summary statistics for x:
count    200.000000
mean       2.564062
std        0.365963
min        1.500000
25%        2.312500
50%        2.562500
75%        2.812500
max        3.625000
Name: x, dtype: float64


Summary statistics for a:
count    200.000000
mean       2.526562
std        0.296377
min        1.687500
25%        2.312500
50%        2.562500
75%        2.750000
max        3.312500
Name: a, dtype: float64


Summary statistics for c:
count    200.000000
mean       2.470000
std        0.370840
min        1.375000
25%        2.250000
50%        2.500000
75%   

In [5]:
# print out mean and std for each personality trait
print(summary_stats.loc[['mean', 'std']])
print("theoretical mean for number from 1 to 5 is:", sum([1,2,3,4,5])/5 )

             h         e         x         a        c         o
mean  2.372500  2.530625  2.564062  2.526562  2.47000  2.518125
std   0.350981  0.352329  0.365963  0.296377  0.37084  0.358712
theoretical mean for number from 1 to 5 is: 3.0


In [6]:
# Using summary_stats and plotly, plot a bar chart of the difference of mean to 3.5 for each HEXACO trait and use std as error bars

prt_values = [
    [ 3., 2.9, 2.7, 3.6, 3.5, 2.8], # participant 1
    [ 3.4, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

mean = summary_stats.loc['mean'] 

std = summary_stats.loc['std']

fig = px.bar(x=mean.index, y=mean, error_y=std)

colors_used = []

for i, my_values in enumerate(prt_values):
    color = random.choice(px.colors.qualitative.Plotly)
    while color in colors_used:
        color = random.choice(px.colors.qualitative.Plotly)
        colors_used.append(color)

    fig.add_trace(go.Scatter(
        x=['h', 'e', 'x', 'a', 'c', 'o'],
        y=my_values,
        mode='markers',
        marker=dict(color=color, size=10),
        name=f'Participant {i+1}'
    ))


fig.update_layout(
    xaxis_title='Trait',
    yaxis_title='Mean',
    title=f'Mean for each trait for {len(df)} Participants',
    yaxis=dict(range=[2, 4])
    
)
fig.show()

In [7]:
# using df create a box plot with plotly for each HEXACO trait
fig = px.box(df, y=['h', 'e', 'x', 'a', 'c', 'o'])
fig.update_layout(
    xaxis_title='Trait',
    yaxis_title='Trait value',
    title=f'HEXACO Trait Distribution ({len(df)} Participants)'
)

# Add dots at each trait using prt_values
prt_values = [
    [ 3.1, 2.9, 2.5, 3.6, 3.1, 2.5], # participant 1
    [ 3.6, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

colors_used = []
for i, my_values in enumerate(prt_values):
    color = random.choice(px.colors.qualitative.Plotly)
    while color in colors_used:
        color = random.choice(px.colors.qualitative.Plotly)

    fig.add_trace(go.Scatter(
        x=['h', 'e', 'x', 'a', 'c', 'o'],
        y=my_values,
        mode='markers',
        marker=dict(color=color, size=10),
        name=f'Participant {i+1}'
    ))

fig.show()


In [8]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [9]:
# using df create a histogram for each HEXACO trait and put them as facets
fig = px.histogram(df, x=['h', 'e', 'x', 'a', 'c', 'o'], 
        facet_col='variable', facet_col_wrap=2, nbins=30)
fig.update_layout(
    #xaxis_title='Trait value',
    yaxis_title='Count',
    title=f'HEXACO Trait Distribution ({len(df)} Participants)',
)
# Set x-axis range and tick marks
fig.update_xaxes(range=[0, 5], tickmode='linear', tick0=0, dtick=0.5)

max_count = max([max(np.histogram(df[trait ], bins=25)[0]) for trait in ['h', 'e', 'x', 'a', 'c', 'o']])   

# set y axis to start at 0 and end at the highest count
fig.update_yaxes(range=[0, max_count])

# Add dots at each trait using prt_values
prt_values = [
    [ 3.1, 2.9, 2.5, 3.6, 3.1, 2.5], # participant 1
    [ 3.6, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

# set figure height and width
fig.update_layout(height=800, width=800)

fig.show()
