In [1]:
from main import create_data
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import random


df = create_data("data/CHtest1.csv", 200)

summary_stats = df[['h', 'e', 'x', 'a', 'c', 'o']].describe()
# summary stats example:
# count    200.000000
# mean       3.003437
# std        0.343254
# min        2.062500
# 25%        2.750000
# 50%        3.000000
# 75%        3.250000
# max        4.062500

In [2]:
# print the summary statistics for each of the columns (H E X A C O) in df
for col in ['h', 'e', 'x', 'a', 'c', 'o']:
    print(f"Summary statistics for {col}:")
    print(summary_stats[col])
    print("\n")

Summary statistics for h:
count    200.000000
mean       2.987500
std        0.346714
min        1.937500
25%        2.796875
50%        3.000000
75%        3.187500
max        3.937500
Name: h, dtype: float64


Summary statistics for e:
count    200.000000
mean       3.035938
std        0.315932
min        2.187500
25%        2.812500
50%        3.062500
75%        3.250000
max        3.750000
Name: e, dtype: float64


Summary statistics for x:
count    200.000000
mean       2.984688
std        0.349055
min        2.187500
25%        2.687500
50%        3.000000
75%        3.250000
max        4.000000
Name: x, dtype: float64


Summary statistics for a:
count    200.000000
mean       2.948750
std        0.356195
min        2.000000
25%        2.750000
50%        2.937500
75%        3.187500
max        3.812500
Name: a, dtype: float64


Summary statistics for c:
count    200.000000
mean       3.026250
std        0.367458
min        1.937500
25%        2.812500
50%        3.000000
75%   

In [3]:
# print out mean and std for each personality trait
print(summary_stats.loc[['mean', 'std']])
print("theoretical mean for number from 1 to 5", sum([1,2,3,4,5])/5 )


             h         e         x         a         c         o
mean  2.987500  3.035938  2.984688  2.948750  3.026250  2.935625
std   0.346714  0.315932  0.349055  0.356195  0.367458  0.333256
theoretical mean for number from 1 to 5 3.0


In [4]:
# Using summary_stats and plotly plot a bar chart of the different of mean to 3.5 for each HEXACO trait and use std as error bars

prt_values = [
    [ 3., 2.9, 2.7, 3.6, 3.5, 2.8], # participant 1
    [ 3.4, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

mean = summary_stats.loc['mean'] 

std = summary_stats.loc['std']

fig = px.bar(x=mean.index, y=mean, error_y=std)

colors_used = []
for i, my_values in enumerate(prt_values):
    color = random.choice(px.colors.qualitative.Plotly)
    while color in colors_used:
        color = random.choice(px.colors.qualitative.Plotly)
        colors_used.append(color)

    fig.add_trace(go.Scatter(
        x=['h', 'e', 'x', 'a', 'c', 'o'],
        y=my_values,
        mode='markers',
        marker=dict(color=color, size=10),
        name=f'Participant {i+1}'
    ))


fig.update_layout(
    xaxis_title='Trait',
    yaxis_title='Mean',
    title=f'Mean for each trait for {len(df)} Participants',
    yaxis=dict(range=[2, 4])
    
)
fig.show()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [None]:
# using df create a box plot with plotly for each HEXACO trait
fig = px.box(df, y=['h', 'e', 'x', 'a', 'c', 'o'])
fig.update_layout(
    xaxis_title='Trait',
    yaxis_title='Trait value',
    title=f'HEXACO Trait Distribution ({len(df)} Participants)'
)

# Add dots at each trait using prt_values
prt_values = [
    [ 3.1, 2.9, 2.5, 3.6, 3.1, 2.5], # participant 1
    [ 3.6, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

colors_used = []
for i, my_values in enumerate(prt_values):
    color = random.choice(px.colors.qualitative.Plotly)
    while color in colors_used:
        color = random.choice(px.colors.qualitative.Plotly)

    fig.add_trace(go.Scatter(
        x=['h', 'e', 'x', 'a', 'c', 'o'],
        y=my_values,
        mode='markers',
        marker=dict(color=color, size=10),
        name=f'Participant {i+1}'
    ))

fig.show()


In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
# using df create a histogram for each HEXACO traits and put them as facets
fig = px.histogram(df, x=['h', 'e', 'x', 'a', 'c', 'o'], 
        facet_col='variable', facet_col_wrap=2, nbins=30)
fig.update_layout(
    #xaxis_title='Trait value',
    yaxis_title='Count',
    title=f'HEXACO Trait Distribution ({len(df)} Participants)',
)
# Set x-axis range and tick marks
fig.update_xaxes(range=[0, 5], tickmode='linear', tick0=0, dtick=0.5)

max_count = max([max(np.histogram(df[trait ], bins=25)[0]) for trait in ['h', 'e', 'x', 'a', 'c', 'o']])   

# set y axis to start at 0 and end at the highest count
fig.update_yaxes(range=[0, max_count])

# Add dots at each trait using prt_values
prt_values = [
    [ 3.1, 2.9, 2.5, 3.6, 3.1, 2.5], # participant 1
    [ 3.6, 2.7, 3.1, 2.6, 3.2, 3.1], # participant 2
]

# set figure height and width
fig.update_layout(height=800, width=800)

fig.show()
