# Add 'No response provided' to missing answers of weekly survey
 - Author: Mario Frei, 2025

## Imports

In [20]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.colors as pc

## Configurations

In [21]:
weekly_survey_path = '../2_processing/5_weekly_survey_with_personality_no_nan.csv'

## Read and visualize data

In [22]:
# Read data
df = pd.read_csv(weekly_survey_path)

# Save data
df.head()

Unnamed: 0,id_participant,timestamp,This app is easy to use.,How often do you wear your watch in a week?,The notifications on the watch were annoying.,The notifications on the phone were annoying.,I answered the the question on the watch honestly.,The Cozie app feels fast and responsive.,The notifications helped to change my mind.,I changed my location after seeing the notifications.,...,"Over the past week, I put on my earphones to mitigate noise distractions after receiving the noise intervention messages.","Over the past week, I changed my location, adjusted the thermostat, fan etc. to feel thermally comfortable after receiving the temperature intervention messages.",The intervention messages were annoying.,Extraversion,Agreeableness,Conscientiousness,Emotional Stability,Openness to Experience,swls,sensitivity
0,orenth01,2022-10-10 14:22:56.671000+00:00,Strongly disagree,40-50 hours,Neutral,Neutral,Strongly disagree,Strongly disagree,Somewhat agree,Very rarely,...,No response,No response,No response,3.5,6.0,3.0,5.5,3.0,22,4.25
1,orenth01,2022-10-20 01:44:32.965000+00:00,Strongly disagree,>50 hours,Strongly disagree,Strongly disagree,Strongly disagree,Agree,Agree,Rarely,...,No response,No response,No response,3.5,6.0,3.0,5.5,3.0,22,4.25
2,orenth01,2022-10-26 07:31:27.555000+00:00,Neutral,>50 hours,Agree,Agree,Strong Agree,Agree,Agree,Occasionally,...,No response,No response,No response,3.5,6.0,3.0,5.5,3.0,22,4.25
3,orenth02,2022-10-12 01:13:47.044000+00:00,Agree,10-20 hours,Disagree,Neutral,Strong Agree,Agree,Agree,Rarely,...,No response,No response,No response,4.5,3.5,5.0,5.5,4.0,26,4.666667
4,orenth02,2022-10-17 01:01:01.540000+00:00,Strong Agree,<10 hours,Disagree,Neutral,Agree,Agree,Agree,Occasionally,...,No response,No response,No response,4.5,3.5,5.0,5.5,4.0,26,4.666667


## Process data

In [23]:
minimal_days_between_responses = 4
minimal_number_of_responses = 2
drop_count = 0
df_new = pd.DataFrame()
for id_participant, df_group in df.groupby('id_participant'):
    print('################################################################')
    # Convert timestamp to datetime
    df_group['timestamp'] = pd.to_datetime(df_group['timestamp'])
    

    found_flag = True
    while found_flag==True:
        print(' Start new check')

        # Compute time between responses
        df_group['dt_days'] = df_group['timestamp'].diff().dt.total_seconds() / 60 / 60 / 24
        df_group['dt_days'] = df_group['dt_days'].fillna(7) # Fill first row with 7 days, for it to be always valid
        
        print(df_group[['id_participant', 'timestamp', 'dt_days']])
        if df_group.shape[0] <= minimal_number_of_responses:
            # Keep all responses if there are only two responses available
            print('  Only two responses, keep all')
            found_flag = False
            break

        keep_index_list = []
        found_flag = False
        for index, row in df_group.iterrows():
            row.head()
            if found_flag==True:
                # Keep remaining rows in this check
                keep_index_list.append(index)
                print('  ', index, ' Add remaining rows')
                continue

            if row['dt_days'] <= minimal_days_between_responses:
                # Do not keep this row
                found_flag = True
                drop_count += 1
                print('  ', index, ' Do not add this row')
            else:
                # Keep this row
                keep_index_list.append(index)
                print('  ', index, ' Keep this row')

        # Apply filter
        df_group = df_group.loc[keep_index_list]
        print(' ')

    df_new = pd.concat([df_new, df_group])

# Drop helper column used during processing
df_new = df_new.drop(columns=['dt_days'])

print('Number of rows originally:', df.shape[0])
print('Number of rows new:', df_new.shape[0])
print('Dropped', drop_count, 'rows')


################################################################
 Start new check
  id_participant                        timestamp   dt_days
0       orenth01 2022-10-10 14:22:56.671000+00:00  7.000000
1       orenth01 2022-10-20 01:44:32.965000+00:00  9.473337
2       orenth01 2022-10-26 07:31:27.555000+00:00  6.240910
   0  Keep this row
   1  Keep this row
   2  Keep this row
 
################################################################
 Start new check
  id_participant                        timestamp   dt_days
3       orenth02 2022-10-12 01:13:47.044000+00:00  7.000000
4       orenth02 2022-10-17 01:01:01.540000+00:00  4.991140
5       orenth02 2022-10-18 04:00:47.730000+00:00  1.124840
6       orenth02 2022-10-25 04:01:38.304000+00:00  7.000585
7       orenth02 2022-10-31 04:03:46.410000+00:00  6.001483
8       orenth02 2022-10-31 04:04:13.499000+00:00  0.000314
   3  Keep this row
   4  Keep this row
   5  Do not add this row
   6  Add remaining rows
   7  Add remaining row

## Vizualize processed data

In [24]:
fig = make_subplots(rows=1, cols=1)


counts_original = df['id_participant'].value_counts(sort=False)
counts_new = df_new['id_participant'].value_counts(sort=False)
fig.add_trace(
    go.Bar(x=counts_original.index, y=counts_original.values, name='Original Data', offset=-0.6),
    row=1, col=1
)

fig.add_trace(
    go.Bar(x=counts_new.index, y=counts_new.values, name='After processing'),
    row=1, col=1
)

fig.update_xaxes(
    title_text='Participant ID',
    tickangle=-90
)
fig.update_yaxes(
    range=[0, 14],  # Range of ticks
    title_text='Number of weekly survey responses',
)

fig.update_layout(
    title='Number of weekly survey responses per participant',
    title_x=0.5,  # Align title to center
    barmode='overlay',  # Ensures bars are grouped
    bargap=0.3,       # Adjust spacing between groups
    bargroupgap=0.4,   # Adjust overlap within groups
    height=600,
    width=2000,
)

fig.show()
fig.write_html('number_of_weekly_survey_responses_bar.html')
fig.write_image('number_of_weekly_survey_responses_bar.png')

In [25]:
# Get the default Plotly color palette
color_palette= pc.DEFAULT_PLOTLY_COLORS


fig = make_subplots(rows=1, cols=1)

# Add traces for original data
y_ticks = []
y_labels = []
i = 1
for id_participant, df_group in df.groupby('id_participant'):
    df_group['y'] = i
    fig.add_trace(
        go.Scatter(x=df_group['timestamp'], y=df_group['y'], mode='markers', name=id_participant, 
                   marker_symbol='circle-open', marker_size=6, marker_color=color_palette[i%len(color_palette)]),
        row=1, col=1
    )
    y_ticks.append(i)
    y_labels.append(id_participant)
    i = i+1

# Add traces for processed data
i = 1
for id_participant, df_group in df_new.groupby('id_participant'):
    df_group['y'] = i
    fig.add_trace(
        go.Scatter(x=df_group['timestamp'], y=df_group['y'], mode='markers', name=id_participant, 
                   marker_symbol='circle', marker_size=5, marker_color=color_palette[i%len(color_palette)]),
        row=1, col=1   
    )
    i = i+1

# Add manual legend
fig.update_layout(
    annotations=[
        dict(
            text='Legend:<br>	&#9898; Original data<br>&#9899; Processed data',  # Text to display
            x=df['timestamp'].min(),    # X-coordinate
            y=80,                       # Y-coordinate
            showarrow=False,            # Do not show arrow
            font=dict(
                size=12,
                color='black'
            ),
            bgcolor='white',            # Background color of the text box
            bordercolor='black',        # Border color of the text box
            borderwidth=1,
            xanchor='left', 
            align='left'
        )
    ]
)

# Format figure
fig.update_yaxes(
    range=[0, len(y_ticks)+1],  # Range of ticks
    tickvals=y_ticks,           # Positions of ticks
    ticktext=y_labels,          # Labels for the ticks
    title_text='Participant ID'
)

# Update layout for better readability
fig.update_layout(
    height=1400, width=1400,
    title_x=0.5,
    showlegend=False
)


# Show the plot
fig.show()

fig.write_html('number_of_weekly_survey_responses_timeline.html')
fig.write_image('number_of_weekly_survey_responses_timeline.png')

# Save data

In [27]:
df_new.to_csv('../2_processing/7_weekly_survey_with_personality_no_nan_one_response_per_week.csv', index=False)