In [None]:
# --- Import Libraries ---

import scipy.io
from google.colab import drive
import matplotlib.pyplot as plt
import numpy as np
import zipfile
import seaborn as sns
import pandas as pd

drive.mount('/content/drive')

In [None]:
# --- Get Arousal Ratings from Online Survey ---

# define the path to the zip file containing the data
path_zip_file = '/YOUR_PATH/newAATexp_379090_2025-01-06_12h14.38_082fe072-b0be-437c-95c4-ba8c4be2d92e.zip'

# create an empty list for the ratings
arousal_ratings = []

# access the zip file
with zipfile.ZipFile(path_zip_file, 'r') as zip_file:

  # retrieve the file names
  file_names = zip_file.namelist()

  # get only csv files from data folder
  csv_files = [x for x in file_names if x.startswith('data/') and x.endswith('.csv')]

  # go through the files
  for csv_file in csv_files:

          # open the csv file
          with zip_file.open(csv_file) as file:

            # read the data from the participant, then extract the image id and arousal rating
            participant_data = pd.read_csv(file)
            arousal_ratings.append(participant_data[['exp_images', 'slider_arousal_main.response']])


# save all the ratings into a pandas object and drop missing values (NaNs)
total_arousal_ratings = pd.concat(arousal_ratings, ignore_index=True)
total_arousal_ratings = total_arousal_ratings.dropna()

# --- Computing Arousal Levels ---

# invert the values from the scale so that 9 is high arousal and 1 low arousal and save as inverted_arousal_rating
total_arousal_ratings['inverted_arousal_rating'] = (total_arousal_ratings['slider_arousal_main.response'] - 10) * -1

# calculate the median of the inverted ratings for each image
median_ratings = total_arousal_ratings.groupby('exp_images')['inverted_arousal_rating'].median().reset_index()

# update the column name
median_ratings.columns = ['exp_images', 'arousal_levels']

# round the arousal levels
median_ratings['arousal_levels'] = median_ratings['arousal_levels'].round()

# derive the image ids from the names in exp_images and sort by img_id
median_ratings['img_id'] = median_ratings['exp_images'].str.extract(r'image_(\d+)\.jpg')[0].astype(float)
median_ratings = median_ratings.sort_values(by='img_id').reset_index(drop=True)

# save the arousal levels as a csv
median_ratings.to_csv("/YOUR_PATH/median_arousal_ratings.csv", index=False)


# --- Plotting the Arousal Level Distribution ---

# count how frequently each arousal level appeared within the positive and negative image set
positive_arousal_level = median_ratings[median_ratings['img_id'] <= 44]
negative_arousal_level = median_ratings[median_ratings['img_id'] >= 45]
positive_counts = positive_arousal_level['arousal_levels'].value_counts().sort_index()
negative_counts = negative_arousal_level['arousal_levels'].value_counts().sort_index()

# get the range of arousal levels
arousal_range = positive_counts.index.union(negative_counts.index).astype(int)

# plot the distribution of arousal levels per valence
plt.figure(figsize=(10, 6))
sns.barplot(positive_counts, color='limegreen', label='positive')
sns.barplot(negative_counts, color='lightcoral', label='negative', width=0.6)

# define plot details
plt.title('Distribution of Arousal Levels in the Image Set',fontsize = 17)
plt.legend(title='Image Valence', fontsize = 12, title_fontsize = 12)
plt.xlabel('Arousal Level', fontsize = 15)
plt.ylabel('Number of Images', fontsize = 15)
plt.xticks(ticks=np.arange(len(arousal_range)), labels=arousal_range, fontsize = 12)
plt.yticks(np.arange(0,20, 5), fontsize = 12)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()
