## Importing libraries

In [1]:
import os
import pandas as pd
import numpy as np
from itertools import combinations
from scipy.stats import chi2_contingency
from scipy.stats import fisher_exact
from scipy.stats import kruskal, mannwhitneyu

## Importing data

In [4]:
current_directory = os.getcwd()
file_name = 'Clustered_df.csv'
data = pd.read_csv(os.path.join(current_directory, file_name), index_col="Unnamed: 0")
print(data.columns)

Index(['Age', 'Education', 'Affective Symptoms', 'Rumination',
       'Behavioural Symptoms', 'Anxiety Personal Impact',
       'Attribution Skepticism', 'Impact Skepticism', 'Trend Skepticism',
       'Response Skepticism', 'Male', 'Female', 'Non-binary', 'Single',
       'Married', 'Divorced', 'Widowed', 'Separated', 'Income', 'Cluster'],
      dtype='object')


# Statistical Analysis for attributes comparison 

## CATEGORICAL VARIABLES (Chi-Squared and Fisher's Exact Test)

In [19]:
df_categorical = data
columns_to_drop = ['Age', 'Income']
df_categorical = df_categorical.drop(columns=columns_to_drop)

print(df_categorical.columns)

Index(['Education', 'Affective Symptoms', 'Rumination', 'Behavioural Symptoms',
       'Anxiety Personal Impact', 'Attribution Skepticism',
       'Impact Skepticism', 'Trend Skepticism', 'Response Skepticism', 'Male',
       'Female', 'Non-binary', 'Single', 'Married', 'Divorced', 'Widowed',
       'Separated', 'Cluster'],
      dtype='object')


### Computation of p-values for each feature, comparing the three groups together (1 test per feature)

In [20]:
cluster_colonna = 'Cluster'

# Itera su tutte le colonne tranne la colonna 'Cluster'
for attributo_selezionato in df_categorical.columns[df_categorical.columns != cluster_colonna]:
    # Calcola la tabella di contingenza per l'attributo corrente
    contingency_table = pd.crosstab(df_categorical[attributo_selezionato], df_categorical[cluster_colonna])
    
    # Stampa il risultato
    print(f"\nTabella di contingenza per l'attributo {attributo_selezionato}:\n{contingency_table}")
    
    all_values_above_5 = np.all(contingency_table >= 5)

    if all_values_above_5:  # Apply Chi-squared
        print(f"\nFor the feature {attributo_selezionato}, all expected frequencies are at least 5. I can perform Chi-Squared test")
        # Perform Chi-squared test
        chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)

        # Results:
        print(f"Feature: {attributo_selezionato}")
        print(f"Chi-square Statistic: {chi2_stat}")
        print(f"P-Value: {p_value}")
        
    else: 
        print(f"\nFor the feature {attributo_selezionato}, some expected frequencies are below 5. Consider alternative methods.")

        print("I have to perform Fisher's Exact Test for non binary features.")

        # I performed Fisher's Exact Test for non binary features in R and i got the following results:
        # See the "R code (groups together).r"
        results_fisher_together = [
            {"Test": "Education", "p_value": 0.0004997501},
            {"Test": "Affective Symptoms", "p_value": 0.0004997501},
            {"Test": "Rumination", "p_value": 0.0004997501},
            {"Test": "Behavioural Symptoms", "p_value": 0.0004997501},
            {"Test": "Anxiety Personal Impact", "p_value": 0.0004997501},
            {"Test": "Attribution Skepticism", "p_value": 0.0004997501},
            {"Test": "Impact Skepticism", "p_value": 0.0004997501},
            {"Test": "Trend Skepticism", "p_value": 0.0004997501},
            {"Test": "Response Skepticism", "p_value": 0.9375312},
            {"Test": "Single", "p_value": 0.0004997501},
            {"Test": "Widowed", "p_value": 0.1454273},
            {"Test": "Separated", "p_value": 0.0004997501},
            ]

         # Create a DataFrame with named rows
        results_fisher_together_df = pd.DataFrame(results_fisher_together)
        results_fisher_together_df.set_index("Test", inplace=True)
        
        feature_val = str(attributo_selezionato)

        # Extract the p-value for the specified feature
        p_value_together = results_fisher_together_df.loc[feature_val, 'p_value']

        print(f"Feature: {attributo_selezionato}")
        print(f"Fisher P value: {p_value_together}")

            


Tabella di contingenza per l'attributo Education:
Cluster     1   2   3
Education            
5.000000    0   8   0
8.000000    0  24  12
13.000000  11  14  31
14.100629   1   0   0
18.000000  18   7   9
22.000000  15   0   2
25.000000   8   0   0

For the feature Education, some expected frequencies are below 5. Consider alternative methods.
I have to perform Fisher's Exact Test for non binary features.
Feature: Education
Fisher P value: 0.0004997501

Tabella di contingenza per l'attributo Affective Symptoms:
Cluster              1   2   3
Affective Symptoms            
0.000000             3   0   0
0.250000             4   0   0
0.500000             8   0   0
0.750000            11   0   1
1.000000             5   0   4
1.250000             6   0   5
1.333333             0   0   1
1.500000             5   0   6
1.666667             1   0   0
1.750000             5   1   8
2.000000             3   0  10
2.250000             1   5   6
2.500000             0  15   1
2.666667          

### Computation of p-values for each feature, comparing the three groups in pairs (3 test per feature)

In [21]:
# List of all categorical variables in the DataFrame
categorical_variables = df_categorical.columns.tolist()
cluster_colonna = 'Cluster'

# Significance level after Bonferroni correction
alpha = 0.05
alpha_adjusted = alpha / 3
count_dict = {variable: 0 for variable in categorical_variables}

for feature in df_categorical.columns[df_categorical.columns != cluster_colonna]:
    # Create a contingency table
    contingency_table = pd.crosstab(df_categorical[feature], df_categorical[cluster_colonna])

    # Iterate over all possible pairs of clusters
    clusters = df_categorical[cluster_colonna].unique()
    for i in range(len(clusters)):
        for j in range(i + 1, len(clusters)):
            # Create a contingency table for the pair of clusters
            subset_table = contingency_table[[clusters[i], clusters[j]]]
            print (f"\n {subset_table}")
            # Check if all expected frequencies are above 5
            all_values_above_5 = np.all(subset_table >= 5)

            if all_values_above_5:  # Apply Chi-square
                print(f"\nFor the feature {feature}, all expected frequencies are at least 5. I can perform Chi-Squared test")
                # Perform Chi-square test
                chi2_stat, p_value, _, _ = chi2_contingency(subset_table)

                # Results:
                print(f"Feature: {feature}, Cluster Pair: {clusters[i]} vs {clusters[j]}")
                print(f"Chi-square Statistic: {chi2_stat}")
                print(f"P-Value: {p_value}")
                print(f"Adjusted alpha with Bonferroni: {alpha_adjusted}")

                # Check if the result is statistically significant after Bonferroni correction
                if p_value < alpha_adjusted:
                    print(f"The clusters {clusters[i]} and {clusters[j]} are statistically different regarding the feature {feature}.")
                else:
                    print(f"The clusters {clusters[i]} and {clusters[j]} are NOT statistically different regarding the feature {feature}.")
                    count_dict[feature] += 1


            else: 
                print(f"\nFor the feature {feature}, some expected frequencies are below 5. Consider alternative methods.")
                
                if df_categorical[feature].nunique() < 3: # Apply Fisher
                    print("I can perform Fisher's Exact Test.")
                    # Perform Fisher’s Exact Test
                    odds_ratio, p_value = fisher_exact(subset_table)

                    # Results:
                    print(f"Feature: {feature}")
                    print(f"Fisher Odds Ratio: {odds_ratio}, p-value: {p_value}")
                    print(f"Adjusted alpha with Bonferroni: {alpha_adjusted}")

                    # Check if the result is statistically significant after Bonferroni correction
                    if p_value < alpha_adjusted:
                        print(f"The clusters {clusters[i]} and {clusters[j]} are statistically different regarding the feature {feature}.")
                    else:
                        print(f"The clusters {clusters[i]} and {clusters[j]} are NOT statistically different regarding the feature {feature}.")
                        count_dict[feature] += 1


                else:
                    print("I have to perform Fisher's Exact Test for non binary features.")

                    # I performed Fisher's Exact Test for non binary features in R and i got the following results:
                    # See the "R code (groups paired).r"
                    results_fisher = [
                        {"Test": "Education", "i":"2", "j":"1", "p_value": 0.0004997501},
                        {"Test": "Education", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Education", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Affective Symptoms", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Affective Symptoms", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Affective Symptoms", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Rumination", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Rumination", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Rumination", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Behavioural Symptoms", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Behavioural Symptoms", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Behavioural Symptoms", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Anxiety Personal Impact", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Anxiety Personal Impact", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Anxiety Personal Impact", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Attribution Skepticism", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Attribution Skepticism", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Attribution Skepticism", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Impact Skepticism", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Impact Skepticism", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Impact Skepticism", "i":"1", "j":"3","p_value": 0.2618691},
                        {"Test": "Trend Skepticism", "i":"2", "j":"1","p_value": 0.0004997501},
                        {"Test": "Trend Skepticism", "i":"2", "j":"3","p_value": 0.0004997501},
                        {"Test": "Trend Skepticism", "i":"1", "j":"3","p_value": 0.0004997501},
                        {"Test": "Response Skepticism", "i":"2", "j":"1","p_value": 0.6311844},
                        {"Test": "Response Skepticism", "i":"2", "j":"3","p_value": 0.934033},
                        {"Test": "Response Skepticism", "i":"1", "j":"3","p_value": 0.941029}
                    ]

                    # Create a DataFrame with named rows
                    results_fisher_df = pd.DataFrame(results_fisher)
                    results_fisher_df.set_index("Test", inplace=True)
                    
                    i_value = str(clusters[i])
                    j_value = str(clusters[j])
                    feature_value = str(feature)

                    # Filter the DataFrame based on conditions
                    filtered_df = results_fisher_df[(results_fisher_df['i'] == i_value) & (results_fisher_df['j'] == j_value)]
                    # Extract the p-value for the specified feature
                    p_value_to_compare = filtered_df.loc[feature_value, 'p_value']

                    print(f"Feature: {feature}")
                    print(f"Fisher P value: {p_value_to_compare}")
                    print(f"Adjusted alpha with Bonferroni: {alpha_adjusted}")

                    if p_value_to_compare < alpha_adjusted:
                        print(f"The clusters {clusters[i]} and {clusters[j]} are statistically different regarding the feature {feature}.")
                    else:
                        print(f"The clusters {clusters[i]} and {clusters[j]} are NOT statistically different regarding the feature {feature}.")
                        count_dict[feature] += 1




 Cluster     2   1
Education        
5.000000    8   0
8.000000   24   0
13.000000  14  11
14.100629   0   1
18.000000   7  18
22.000000   0  15
25.000000   0   8

For the feature Education, some expected frequencies are below 5. Consider alternative methods.
I have to perform Fisher's Exact Test for non binary features.
Feature: Education
Fisher P value: 0.0004997501
Adjusted alpha with Bonferroni: 0.016666666666666666
The clusters 2 and 1 are statistically different regarding the feature Education.

 Cluster     2   3
Education        
5.000000    8   0
8.000000   24  12
13.000000  14  31
14.100629   0   0
18.000000   7   9
22.000000   0   2
25.000000   0   0

For the feature Education, some expected frequencies are below 5. Consider alternative methods.
I have to perform Fisher's Exact Test for non binary features.
Feature: Education
Fisher P value: 0.0004997501
Adjusted alpha with Bonferroni: 0.016666666666666666
The clusters 2 and 3 are statistically different regarding the featu


 Cluster                2   1
Behavioural Symptoms        
0.000000               0   5
0.333333               0   8
0.500000               0   1
0.666667               0   7
1.000000               0   7
1.333333               1   7
1.666667               3  10
2.000000               0   7
2.333333               8   1
2.666667               5   0
3.000000              36   0

For the feature Behavioural Symptoms, some expected frequencies are below 5. Consider alternative methods.
I have to perform Fisher's Exact Test for non binary features.
Feature: Behavioural Symptoms
Fisher P value: 0.0004997501
Adjusted alpha with Bonferroni: 0.016666666666666666
The clusters 2 and 1 are statistically different regarding the feature Behavioural Symptoms.

 Cluster                2   3
Behavioural Symptoms        
0.000000               0   2
0.333333               0   0
0.500000               0   0
0.666667               0   0
1.000000               0   4
1.333333               1  11
1.666667   

In [22]:
for variable, count in count_dict.items():
    print(f"Number of p-values > 0.0167 for {variable}: {count}")

Number of p-values > 0.0167 for Education: 0
Number of p-values > 0.0167 for Affective Symptoms: 0
Number of p-values > 0.0167 for Rumination: 0
Number of p-values > 0.0167 for Behavioural Symptoms: 0
Number of p-values > 0.0167 for Anxiety Personal Impact: 0
Number of p-values > 0.0167 for Attribution Skepticism: 0
Number of p-values > 0.0167 for Impact Skepticism: 1
Number of p-values > 0.0167 for Trend Skepticism: 0
Number of p-values > 0.0167 for Response Skepticism: 3
Number of p-values > 0.0167 for Male: 3
Number of p-values > 0.0167 for Female: 3
Number of p-values > 0.0167 for Non-binary: 3
Number of p-values > 0.0167 for Single: 1
Number of p-values > 0.0167 for Married: 1
Number of p-values > 0.0167 for Divorced: 3
Number of p-values > 0.0167 for Widowed: 3
Number of p-values > 0.0167 for Separated: 1
Number of p-values > 0.0167 for Cluster: 0


##  NUMERICAL VARIABLES (Kruskal-Wallis Test + Pairwise MannWhitney U Tests)

In [23]:
df_numerical = data
columns_to_drop = ['Education', 'Affective Symptoms', 'Rumination', 'Behavioural Symptoms','Anxiety Personal Impact', 'Attribution Skepticism', 'Impact Skepticism', 'Trend Skepticism', 'Response Skepticism', 'Male',
       'Female', 'Non-binary', 'Single', 'Married', 'Divorced', 'Widowed',
       'Separated']
#columns_to_drop = ['Cluster']
df_numerical = df_numerical.drop(columns=columns_to_drop)
print(df_numerical.columns)

Index(['Age', 'Income', 'Cluster'], dtype='object')


In [24]:
alpha_corrected = 0.05 / 3
# Calculate the Kruskal-Wallis test for each column with respect to the groups defined by 'Clusters'.
columns_to_test = ['Age', 'Income']
for column in columns_to_test:
    groups = [df_numerical[column][df_numerical['Cluster'] == i] for i in range(1, 4)]
    kruskal_stat, kruskal_p_value = kruskal(*groups)
    print(f"Kruskal-Wallis Test for {column}: Statistic = {kruskal_stat}, p-value = {kruskal_p_value}")

    # If the p-value of the Kruskal-Wallis test is significant, perform Mann-Whitney U pairwise tests
    if kruskal_p_value < 0.05:
        pairwise_combs = list(combinations(range(1, 4), 2))
        for comb in pairwise_combs:
            group1 = groups[comb[0] - 1]
            group2 = groups[comb[1] - 1]
            mannwhitneyu_stat, mannwhitneyu_p_value = mannwhitneyu(group1, group2)
            # Comparison with Bonferroni's corrected threshold
            if mannwhitneyu_p_value < alpha_corrected:
                print(f"Mann-Whitney U Test between cluster {comb[0]} and cluster {comb[1]} for {column}: "
                      f"Statistic = {mannwhitneyu_stat}, p-value = {mannwhitneyu_p_value} (Statistical difference)")
            else:
                print(f"Mann-Whitney U Test between cluster {comb[0]} and cluster {comb[1]} for {column}: "
                      f"Statistic = {mannwhitneyu_stat}, p-value = {mannwhitneyu_p_value} (No statistical difference)")

Kruskal-Wallis Test for Age: Statistic = 104.89273324283666, p-value = 1.6704457674798354e-23
Mann-Whitney U Test between cluster 1 and cluster 2 for Age: Statistic = 7.0, p-value = 1.01518886253871e-18 (Statistical difference)
Mann-Whitney U Test between cluster 1 and cluster 3 for Age: Statistic = 10.0, p-value = 8.07241154519579e-19 (Statistical difference)
Mann-Whitney U Test between cluster 2 and cluster 3 for Age: Statistic = 1560.5, p-value = 0.42033942889913023 (No statistical difference)
Kruskal-Wallis Test for Income: Statistic = 84.69148565333114, p-value = 4.0689050762271256e-19
Mann-Whitney U Test between cluster 1 and cluster 2 for Income: Statistic = 2652.5, p-value = 3.0953455055822063e-15 (Statistical difference)
Mann-Whitney U Test between cluster 1 and cluster 3 for Income: Statistic = 1312.0, p-value = 0.45951130427912523 (No statistical difference)
Mann-Whitney U Test between cluster 2 and cluster 3 for Income: Statistic = 146.5, p-value = 1.1825570172755493e-15 (S

# Table creation

### Tendencies

In [25]:
group_1 = data.loc[(data['Cluster']==1), :] 
group_2 = data.loc[(data['Cluster']==2), :] 
group_3 = data.loc[(data['Cluster']==3), :] 

# print(group_1)
print('The number of pp in group 1 is: ' , len(group_1))
# print(group_2)
print('The number of pp in group 2 is: ' , len(group_2))
# print(group_3)
print('The number of pp in group 3 is: ' , len(group_3))

The number of pp in group 1 is:  53
The number of pp in group 2 is:  53
The number of pp in group 3 is:  54


Group 1

In [26]:
column_means_1 = group_1.mean()
column_modes_1 = group_1.mode()
column_medians_1 = group_1.median()

print(column_means_1)
print(column_modes_1)
print(column_medians_1)

Age                           26.188679
Education                     19.077370
Affective Symptoms             1.036164
Rumination                     1.037736
Behavioural Symptoms           1.078616
Anxiety Personal Impact        1.157233
Attribution Skepticism         0.723270
Impact Skepticism              0.654088
Trend Skepticism               0.833333
Response Skepticism            2.194969
Male                           0.207547
Female                         0.301887
Non-binary                     0.207547
Single                         0.528302
Married                        0.226415
Divorced                       0.132075
Widowed                        0.000000
Separated                      0.000000
Income                     33211.320755
Cluster                        1.000000
dtype: float64
    Age  Education  Affective Symptoms  Rumination  Behavioural Symptoms  \
0  19.0       18.0                0.75    0.333333              1.666667   
1   NaN        NaN               

Group 2

In [27]:
column_means_2 = group_2.mean()
column_modes_2 = group_2.mode()
column_medians_2 = group_2.median()

print(column_means_2)
print(column_modes_2)
print(column_medians_2)

Age                           45.641509
Education                     10.188679
Affective Symptoms             2.721698
Rumination                     2.710692
Behavioural Symptoms           2.761006
Anxiety Personal Impact        2.748428
Attribution Skepticism         3.437107
Impact Skepticism              3.408805
Trend Skepticism               3.534591
Response Skepticism            2.084906
Male                           0.169811
Female                         0.320755
Non-binary                     0.207547
Single                         0.000000
Married                        0.471698
Divorced                       0.245283
Widowed                        0.075472
Separated                      0.169811
Income                     19682.943396
Cluster                        2.000000
dtype: float64
    Age  Education  Affective Symptoms  Rumination  Behavioural Symptoms  \
0  42.0        8.0                 3.0         3.0                   3.0   
1  53.0        NaN               

Group 3

In [28]:
column_means_3 = group_3.mean()
column_modes_3 = group_3.mode()
column_medians_3 = group_3.median()

print(column_means_3)
print(column_modes_3)
print(column_medians_3)

Age                           44.831936
Education                     13.055556
Affective Symptoms             1.939815
Rumination                     2.030864
Behavioural Symptoms           1.870370
Anxiety Personal Impact        2.006173
Attribution Skepticism         2.219136
Impact Skepticism              0.601852
Trend Skepticism               2.104938
Response Skepticism            2.111111
Male                           0.296296
Female                         0.129630
Non-binary                     0.277778
Single                         0.092593
Married                        0.648148
Divorced                       0.111111
Widowed                        0.074074
Separated                      0.000000
Income                     34533.333333
Cluster                        3.000000
dtype: float64
    Age  Education  Affective Symptoms  Rumination  Behavioural Symptoms  \
0  42.0       13.0                 2.0         2.0                   2.0   
1   NaN        NaN               

I decided to take the modes as the values to put in the table

In [29]:
group_1_tendency = column_modes_1.iloc[0]
print('Group 1 Central Tendency is:\n', group_1_tendency, '\n')

group_2_tendency = column_modes_2.iloc[0]
print('Group 2 Central Tendency is:\n', group_2_tendency, '\n')

group_3_tendency = column_modes_3.iloc[0]
print('Group 3 Central Tendency is:\n', group_3_tendency, '\n')

Group 1 Central Tendency is:
 Age                            19.0
Education                      18.0
Affective Symptoms             0.75
Rumination                 0.333333
Behavioural Symptoms       1.666667
Anxiety Personal Impact         1.0
Attribution Skepticism          1.0
Impact Skepticism          0.333333
Trend Skepticism           1.333333
Response Skepticism        2.666667
Male                          False
Female                        False
Non-binary                    False
Single                         True
Married                       False
Divorced                      False
Widowed                       False
Separated                     False
Income                      33000.0
Cluster                         1.0
Name: 0, dtype: object 

Group 2 Central Tendency is:
 Age                            42.0
Education                       8.0
Affective Symptoms              3.0
Rumination                      3.0
Behavioural Symptoms            3.0
Anxiety Persona

### Computation of  Percentages

In [30]:
indexes_list = group_1_tendency.index.tolist()

# Calculate the frequency of the mode 1
percentage_occurrence_1 = group_1_tendency.copy()

for attribute in indexes_list:
    mode_value_1 = group_1_tendency[attribute]
    mode_frequency_1 = (group_1[attribute] == mode_value_1).sum()

    percentage_occurrence_1[attribute] = mode_frequency_1 / len(group_1) * 100

print('Percentages of Group 1: \n' , percentage_occurrence_1, '\n')

# Calculate the frequency of the mode 2
percentage_occurrence_2 = group_2_tendency.copy()
 
for attribute in indexes_list:
    mode_value_2 = group_2_tendency[attribute]
    mode_frequency_2 = (group_2[attribute] == mode_value_2).sum()

    percentage_occurrence_2[attribute] = mode_frequency_2 / len(group_2) * 100

print('Percentages of Group 2: \n' , percentage_occurrence_2, '\n')

# Calculate the frequency of the mode 3
percentage_occurrence_3 = group_3_tendency.copy()
 
for attribute in indexes_list:
    mode_value_3 = group_3_tendency[attribute]
    mode_frequency_3 = (group_3[attribute] == mode_value_3).sum()

    percentage_occurrence_3[attribute] = mode_frequency_3 / len(group_3) * 100

print('Percentages of Group 3: \n' , percentage_occurrence_3, '\n')

Percentages of Group 1: 
 Age                        13.207547
Education                  33.962264
Affective Symptoms         20.754717
Rumination                 16.981132
Behavioural Symptoms       18.867925
Anxiety Personal Impact    18.867925
Attribution Skepticism     26.415094
Impact Skepticism          26.415094
Trend Skepticism           30.188679
Response Skepticism        26.415094
Male                       79.245283
Female                     69.811321
Non-binary                 79.245283
Single                     52.830189
Married                    77.358491
Divorced                   86.792453
Widowed                        100.0
Separated                      100.0
Income                     11.320755
Cluster                        100.0
Name: 0, dtype: object 

Percentages of Group 2: 
 Age                        11.320755
Education                  45.283019
Affective Symptoms         43.396226
Rumination                 50.943396
Behavioural Symptoms       67.92452

### Final table

In [33]:
indexes_list = group_1_tendency.index.tolist()
res = {'': indexes_list, 'Group 1': group_1_tendency, '% 1': percentage_occurrence_1, 'Group 2': group_2_tendency, 
'% 2': percentage_occurrence_2, 'Group 3': group_3_tendency, '% 3': percentage_occurrence_3}
tabella = pd.DataFrame(res)
tabella_arrotondata = tabella.round(1)
tabella_arrotondata = tabella_arrotondata.drop(tabella_arrotondata.index[-1]) # tolgo la riga Cluster

print(tabella_arrotondata)
# tabella_arrotondata.to_csv('tabella gruppi (cvs).csv', index=False)


                                                   Group 1        % 1  \
Age                                          Age      19.0  13.207547   
Education                              Education      18.0  33.962264   
Affective Symptoms            Affective Symptoms      0.75  20.754717   
Rumination                            Rumination  0.333333  16.981132   
Behavioural Symptoms        Behavioural Symptoms  1.666667  18.867925   
Anxiety Personal Impact  Anxiety Personal Impact       1.0  18.867925   
Attribution Skepticism    Attribution Skepticism       1.0  26.415094   
Impact Skepticism              Impact Skepticism  0.333333  26.415094   
Trend Skepticism                Trend Skepticism  1.333333  30.188679   
Response Skepticism          Response Skepticism  2.666667  26.415094   
Male                                        Male     False  79.245283   
Female                                    Female     False  69.811321   
Non-binary                            Non-binary   

In [32]:
column_means_1 = group_1.mean()
column_modes_1 = group_1.mode()
column_medians_1 = group_1.median()

print(column_means_1)
print(column_modes_1)
print(column_medians_1)

Age                           26.188679
Education                     19.077370
Affective Symptoms             1.036164
Rumination                     1.037736
Behavioural Symptoms           1.078616
Anxiety Personal Impact        1.157233
Attribution Skepticism         0.723270
Impact Skepticism              0.654088
Trend Skepticism               0.833333
Response Skepticism            2.194969
Male                           0.207547
Female                         0.301887
Non-binary                     0.207547
Single                         0.528302
Married                        0.226415
Divorced                       0.132075
Widowed                        0.000000
Separated                      0.000000
Income                     33211.320755
Cluster                        1.000000
dtype: float64
    Age  Education  Affective Symptoms  Rumination  Behavioural Symptoms  \
0  19.0       18.0                0.75    0.333333              1.666667   
1   NaN        NaN               