#### Import

In [2]:
import pandas as pd
from analysis_functions.logs import Logs
from analysis_functions import utils as u

---
# User Study - Participants

#### Load data
- User model
- User study Logs

In [3]:
user_model = pd.read_csv('data/csv_df_user_model.csv')
print(f'- Total number of users: {len(user_model)}')

- Total number of users: 230


### Aggregate counts for each job and system

In [4]:
df_study = user_model.groupby(['job', 'assigned_condition']).size().unstack(fill_value=0).reset_index()
df_study.columns.name = None
df_study['Total'] = df_study[[0,1,2]].sum(axis=1)
df_study

Unnamed: 0,job,0,1,2,Total
0,customer_experience,10,7,6,23
1,data_analysis,9,8,6,23
2,design_creative,5,10,8,23
3,finance_accounting,7,8,8,23
4,human_resources,9,7,7,23
5,it,8,7,8,23
6,legal,9,7,7,23
7,product_management,6,10,7,23
8,research,6,8,9,23
9,sales,8,5,10,23


## Total number of users per system 

In [5]:
total_study = df_study[[0, 1, 2, 'Total']].sum()
total_study = total_study.rename({i: f'System {i}' for i in list(total_study.axes[0])})
study_sample = {i : int(total_study[k]) for i, k in enumerate(total_study.index)}
total_study

System 0         77
System 1         77
System 2         76
System Total    230
dtype: int64

---
# Demographic Analysis

### Age Analysis

In [6]:
age_csv_path = 'data/demographic/age.csv' 
u.get_demographic_df(age_csv_path)

Unnamed: 0.1,Unnamed: 0,Age
0,0,30
1,1,32
2,2,31
3,3,40
4,4,37
...,...,...
225,36,25
226,39,27
227,40,41
228,41,30


In [7]:
u.age_analysis(age_csv_path)

Age demographics indicated that:

        - 22% of participants were between 18-25 years
        - 45% between 26-35 years
        - 20% between 36-45 years
        - 14% over 45 years of age.
    


### Ethnicity Analysis

In [8]:
ethnicity_csv_path = 'data/demographic/ethnicity.csv'
u.get_demographic_df(ethnicity_csv_path)

Unnamed: 0,Ethnicity simplified,count
0,White,103
1,Black,89
2,Asian,18
3,Mixed,12
4,Other,7
5,CONSENT_REVOKED,1


In [9]:
u.ethnicity_analysis(ethnicity_csv_path)

The study included participants from various ethnic backgrounds, with:

    - 45% as White/Caucasian
	- 8% as Asian
	- 39% as Black/African
	- 5% as multiracial or other


### Gender Analysis

In [10]:
gender_csv_path = 'data/demographic/gender.csv'
u.get_demographic_df(gender_csv_path)

Unnamed: 0,Sex,count
0,Male,117
1,Female,112
2,CONSENT_REVOKED,1


### Language Analysis


In [11]:
language_csv_path = 'data/demographic/language.csv'
u.get_demographic_df(language_csv_path)

Unnamed: 0.1,Unnamed: 0,Language,count,percentage
0,0,English,150,65.217391
1,1,Spanish,15,6.521739
2,2,Polish,9,3.913043
3,3,Portuguese,9,3.913043
4,4,Hungarian,6,2.608696
5,5,Italian,5,2.173913
6,6,Greek,4,1.73913
7,7,German,3,1.304348
8,8,Swahili,3,1.304348
9,9,Other,2,0.869565


In [12]:
u.language_analysis(language_csv_path)


    The research study encompassed participants proficient in various languages. 
    The five most prevalent languages among the participants were as follows:

        - English = 65.217%
		- Spanish = 6.522%
		- Polish = 3.913%
		- Portuguese = 3.913%
		- Hungarian = 2.609%
    


### Nationality Analysis

In [13]:
nationality_csv_path = 'data/demographic/nationality.csv'
continent_percentages, continent_distribution = u.nationality_analysis(nationality_csv_path)


    The study participants exhibited a diverse geographical distribution. 
    The breakdown of participants by continent was as follows:

    - Africa (38%)
	- Europe (37%)
	- North America (18%)
	- Asia (4%)
	- South America (2%)
	- Oceania (0%)
    


In [14]:
continent_percentages

continent
Africa           38
Europe           37
North America    18
Asia              4
South America     2
Oceania           0
Unknown           0
Name: count, dtype: int64

In [15]:
continent_distribution

continent
Africa           88
Asia              9
Europe           86
North America    41
Oceania           1
South America     4
Unknown           1
Name: count, dtype: int64