In [None]:
import pandas as pd
import numpy as np

#load the data
data = pd.read_excel('/path/to/your/file.xlsx')

#clean the data
def clean_score(score):
    if pd.isnull(score):
        return None
    if isinstance(score, str) and ',' in score:
        score = score.split(',')[0]
    return pd.to_numeric(score, errors='coerce')

for column in data.columns[3:]:
    data[column] = data[column].apply(clean_score)

In [None]:
#split the dataset based on AI usage
tasks_without_ai = data[data['With AI'] == 'No'].iloc[:, 3:12]
tasks_with_ai = data[data['With AI'] == 'Yes'].iloc[:, 3:12]

#statistical analysis
stats_without_ai = tasks_without_ai.describe().transpose()
stats_with_ai = tasks_with_ai.describe().transpose()

print(stats_without_ai, '\n', stats_with_ai)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

#prepare data for visualization
melted_data_time = pd.melt(data, id_vars=['With AI'], value_vars=[f'Task {i}' for i in range(1, 10)],
                           var_name='Task', value_name='Time')

#create boxplot
plt.figure(figsize=(12, 6))
sns.boxplot(x='Task', y='Time', hue='With AI', data=melted_data_time)
plt.title('Comparison of Time Taken for Tasks With and Without AI')
plt.ylabel('Time (seconds)')
plt.xlabel('Task')
plt.legend(title='With AI')
plt.grid(True)
plt.show()


In [None]:
#histogram for task times without AI
tasks_without_ai.plot(kind='hist', bins=15, alpha=0.5, title='Histogram of Task Times Without AI')
plt.xlabel('Time (seconds)')
plt.show()

#histogram for task times with AI
tasks_with_ai.plot(kind='hist', bins=15, alpha=0.5, title='Histogram of Task Times With AI')
plt.xlabel('Time (seconds)')
plt.show()


In [None]:
from scipy.stats import ttest_rel

t_stat, p_value = ttest_rel(tasks_without_ai['Task 1'].dropna(), tasks_with_ai['Task 1'].dropna())
print(f'T-Statistic: {t_stat}, P-value: {p_value}')


In [None]:
#export cleaned data to CSV
data.to_csv('/path/to/your/output.csv', index=False)
