# Data exploration

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px


In [2]:
full_df = pd.read_pickle('full_data_long_format.pkl')
print(full_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3824 entries, 0 to 3823
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   source_file       3824 non-null   object 
 1   description       3824 non-null   object 
 2   multiple answers  3278 non-null   boolean
 3   question          3824 non-null   object 
 4   answer            3824 non-null   object 
 5   sex               3824 non-null   object 
 6   age               3824 non-null   object 
 7   percentage        3322 non-null   float64
dtypes: boolean(1), float64(1), object(6)
memory usage: 216.7+ KB
None


In [None]:


fig = px.bar(
    full_df[full_df['source_file']=='KV2AI2'],
    x='age',           
    y='percentage',    
    color='answer',  
    barmode='group',   
    facet_col='sex',
    title='Use of Artificial Intelligence'    
)

fig.show()


In [7]:
fig = px.bar(
    full_df[full_df['question']=='What was the purpose of your last library visit?'],
    x='answer',           
    y='percentage',    
    color='age',  
    barmode='group',
    title='What was the purpose of your last library visit?'      
)

fig.show()

In [13]:
full_df['plot_value'] = full_df.apply(lambda row: -row['percentage'] if row['sex']=='Men' else row['percentage'], axis=1)
fig = px.bar(
    full_df[full_df['answer']=='Have played digital games'],
    x='plot_value',    
    y='age',
    color='sex',
    orientation='h',    
    barmode='relative'
)

fig.update_xaxes(tickvals=[-60,-40,-20,0,20,40,60],
                 ticktext=[60,40,20,0,20,40,60])

fig.update_layout(
    title={
        'text': "Have played digital games",
        'y':0.95,
        'x':0.55,
        'xanchor': 'center',
        'yanchor': 'top'})

fig.show()

In [14]:

fig = px.bar(
    full_df[full_df['source_file']=='KV2SP8'],
    x='age',           
    y='percentage',    
    color='answer',  
    barmode='group',   
    facet_col='sex',
    title='Use of live digital game events'    
)

fig.show()

In [15]:

fig = px.bar(
    full_df[full_df['source_file']=='KV2SM1'],
    x='age',           
    y='percentage',    
    color='answer',  
    barmode='group',   
    facet_col='sex',
    title='Use of Social Media'    
)

fig.show()

In [None]:
import plotly.graph_objects as go

df = full_df[full_df['source_file']=='KV2AHOV']

# separate data into men and woen
df_m = df[df['sex']=='Men']
df_f = df[df['sex']=='Women']

fig = go.Figure()

# add traces to figure 
men_trace_ids = []
for ans in df_m['answer'].unique():
    sub = df_m[df_m['answer'] == ans]
    men_trace_ids.append(len(fig.data))
    fig.add_bar(x=sub['age'], y=sub['percentage'], name=f"{ans} (Men)")

women_trace_ids = []
for ans in df_f['answer'].unique():
    sub = df_f[df_f['answer'] == ans]
    women_trace_ids.append(len(fig.data))
    fig.add_bar(x=sub['age'], y=sub['percentage'], name=f"{ans} (Women)", visible=False)

n_total = len(fig.data)

men_visible   = [i in men_trace_ids   for i in range(n_total)]
women_visible = [i in women_trace_ids for i in range(n_total)]

fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="Male",   method="update", args=[{"visible": men_visible}]),
                dict(label="Female", method="update", args=[{"visible": women_visible}])
            ],
            direction="down",
            x=1.2,
            y=1.21
        )
    ],

    annotations=[
        dict(
            text="Select Gender:",
            x=1.05,                 
            y=1.20,                 
            xref="paper",
            yref="paper",
            showarrow=False,
            align="left",
            font=dict(size=14)
        )
    ]
)
fig.update_layout(title="Use of cultural activities (year)")
fig.show()


In [17]:

fig = px.bar(
    full_df[full_df['source_file']=='KV2SC1'],
    x='age',           
    y='percentage',    
    color='answer',  
    barmode='group',   
    facet_col='sex',
    title='Consumption of performing arts or stage performances'    
)

fig.show()