# Group Surveys Feature Determination

## Preparation

### Import

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, QuantileTransformer
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)

## Function Definition

### Read and Concatenate Tables

In [None]:
def read_collapse_tables(whats_features, meetings_features, surveys_features, grades):

  df_whats= pd.read_csv(whats_features, index_col='id',delimiter=";", header=0, usecols=['id', 'group', 'messages_sent', 'messages_total', 'contribution_index', 'ego_art', 'ego_nudges', 'alter_art', 'alter_nudges',
                                                                    'sentiment_avg', 'emotionality_avg', 'complexity_avg', 'influence_message_avg', 'influence_total_in',
                                                                    'influence_message_avg_in', 'influence_total', 'contribution_index_oscillation', 'activity_entanglement',
                                                                    'ALTERNATIVE_REALITIES_Treehugger', 'ALTERNATIVE_REALITIES_Fatherlander', 'ALTERNATIVE_REALITIES_Spiritualism',
                                                                    'ALTERNATIVE_REALITIES_Nerd',  'EMOTIONS_Fear', 'EMOTIONS_Happy',
                                                                    'EMOTIONS_Sad', 'EMOTIONS_Anger', 'Groupflow_Beeflow', 'Groupflow_Leechflow',
                                                                    'Groupflow_Antflow'])
  df_meetings= pd.read_excel(meetings_features,header=0,skiprows=range(1, 4)).rename_axis('Id')
  df_meetings.index += 1
  df_meetings= df_meetings.drop(columns=['group','shown_face','ID'], axis=1)
  df_surveys= pd.read_csv(surveys_features, index_col=0,usecols=['ID', 'group','ethical_likelihood', 'financial_likelihood', 'health_likelihood',
                                                                      'recreational_likelihood', 'social_likelihood', 'total_likelihood',
                                                                      'ethical_perceived', 'financial_perceived', 'health_perceived',
                                                                      'recreational_perceived', 'social_perceived', 'total_perceived',
                                                                      'O', 'C', 'E', 'A', 'N','harm_care_score', 'fairness_reciprocity_score',
                                                                      'in_group_loyality_score', 'authority_respect_score',
                                                                      'purity_sanctity_score', 'dummy_question1', 'dummy_question2',
                                                                      'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'conservation', 'transcendence'])
  df_surveys= df_surveys.drop(columns=['group'], axis=1)
  df_grades= pd.read_excel(grades, index_col='ID', usecols=['ID','TOTAL TEORÍA (6)',	'COEVAL (1)',	'TRABAJO FINAL (1)'])
  df_grades.columns=['theory','coeval','project']
  df = pd.concat([df_meetings, df_whats, df_surveys, df_grades], axis=1)

  df = df.dropna()

  return df

### Display Scatter Plot

In [None]:
def scatterplot (df,x_column,y_column):
  import pandas as pd
  import seaborn as sns
  import matplotlib.pyplot as plt
  import numpy as np
  from sklearn.linear_model import LinearRegression
  from sklearn.preprocessing import PolynomialFeatures

  # Assuming your DataFrame is called 'df' and you want to check relationships between columns 'x' and 'y'
  x = df[x_column]
  y = df[y_column]

  # Create a scatter plot to visualize the relationship
  plt.scatter(x, y)
  plt.xlabel('x')
  plt.ylabel('y')
  plt.title('Scatter Plot')
  plt.show()

  # Fit a polynomial regression model
  degree = 2  # Degree of the polynomial
  poly_features = PolynomialFeatures(degree=degree)
  X_poly = poly_features.fit_transform(x.values.reshape(-1, 1))

  model = LinearRegression()
  model.fit(X_poly, y)

  # Calculate predicted values
  y_pred = model.predict(X_poly)

  # Plot the fitted curve
  plt.scatter(x, y)
  plt.plot(x, y_pred, color='red', label=f'Degree {degree} Polynomial Fit')
  plt.xlabel('x')
  plt.ylabel('y')
  plt.title('Polynomial Regression')
  plt.legend()
  plt.show()


### Correlation Significance

In [None]:
def check_correlation_significance(df, col1, col2):

  # Extract the 'performance' and 'recreational_perceived_mean' columns
  performance = df[col1]
  recreational_mean = df[col2]

  # Perform the hypothesis test
  corr_coeff, p_value = stats.pearsonr(performance, recreational_mean)

  # Print the results
  print("Correlation coefficient:", corr_coeff)
  print("p-value:", p_value)

  # Compare the p-value to the significance level
  alpha = 0.05  # Significance level
  if p_value < alpha:
      print("The correlation is statistically significant (reject H0).")
  else:
      print("The correlation is not statistically significant (fail to reject H0).")


## Use of Function

In [None]:
grades = r'/content/drive/MyDrive/Projects/tps/grades/data/1_participants_grades.xlsx'
whats_features= r'/content/drive/MyDrive/Projects/tps/whatsapp/data/1_nodes_mixed.csv'
meetings_features= r'/content/drive/MyDrive/Projects/tps/meetings/data/12. features/4_individual_features_final_ratio.xlsx'
surveys_features=  r'/content/drive/MyDrive/Projects/tps/surveys/data/1_happimeter_individual_surveys.csv'
out_file= r'/content/drive/MyDrive/Projects/tps/finals/data/3_individual_features.xlsx'

In [None]:
df= read_collapse_tables(whats_features, meetings_features, surveys_features, grades)

In [None]:
#df = min_max_scaling_df(df)
df.rename_axis('Id', inplace=True)
df.head(12)

Unnamed: 0_level_0,indiv_spoken_time,indiv_spoken_time_ratio,average_turn_duration,average_turn_duration_ratio,avg_time_without_speaking,avg_time_without_speaking_ratio,max_time_without_speaking,max_time_without_speaking_ratio,num_turns,num_turns_ratio,avg_turns_without_speaking,avg_turns_without_speaking_ratio,max_turns_without_speaking,max_turns_without_speaking_ratio,num_words,num_words_ratio,avg_words_turn,avg_words_turn_ratio,max_words_turn,max_words_turn_ratio,speech_neu,speech_ang,speech_hap,speech_sad,text_joy,text_anger,text_fear,text_sadness,group,messages_sent,messages_total,contribution_index,ego_art,ego_nudges,alter_art,alter_nudges,sentiment_avg,emotionality_avg,complexity_avg,influence_message_avg,influence_total_in,influence_message_avg_in,influence_total,contribution_index_oscillation,activity_entanglement,ALTERNATIVE_REALITIES_Treehugger,ALTERNATIVE_REALITIES_Fatherlander,ALTERNATIVE_REALITIES_Spiritualism,ALTERNATIVE_REALITIES_Nerd,EMOTIONS_Fear,EMOTIONS_Happy,EMOTIONS_Sad,EMOTIONS_Anger,Groupflow_Beeflow,Groupflow_Leechflow,Groupflow_Antflow,ethical_likelihood,financial_likelihood,health_likelihood,recreational_likelihood,social_likelihood,total_likelihood,ethical_perceived,financial_perceived,health_perceived,recreational_perceived,social_perceived,total_perceived,O,C,E,A,N,harm_care_score,fairness_reciprocity_score,in_group_loyality_score,authority_respect_score,purity_sanctity_score,dummy_question1,dummy_question2,q1,q2,q3,q4,q5,q6,q7,q8,q9,q10,conservation,transcendence,theory,coeval,project
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1
1,526.0,0.148336,7.850746,0.126865,56.210746,0.165958,451.54,0.264343,67.0,0.331683,1.985075,0.093801,7.0,0.090909,978.0,0.1708,14.597015,0.127952,68.0,0.078613,0.832695,0.137502,0.026347,0.003456,0.298809,0.236814,0.232361,0.232015,1.0,34.0,187.0,-0.64,1.332.855.889,2.468.578.279,1.234.255.833,1.903.921.545,0.303561,0.215562,9.179.501.577,0.346125353,1.227.594.788,0.346135338,1.730.626.767,1.0,0.487649,0.472084,0.083889,0.08914,0.354886,0.236101,0.407447,0.251288,0.105165,0.497477,0.156358,0.346165,4.5,3.5,4.166667,3.166667,3.5,3.766667,4.833333,5.666667,5.666667,5.833333,4.833333,5.0,0.6,0.716667,0.6,0.633333,0.583333,27.0,23.0,19.0,18.0,20.0,1.0,5.0,7.0,6.0,5.0,7.0,5.0,4.0,5.0,7.0,8.0,7.0,2.57,-1.78,3.44,0.9,8.0
2,2648.0,0.746757,33.948718,0.548598,21.791169,0.064337,109.24,0.063952,78.0,0.386139,1.589744,0.07512,9.0,0.116883,3816.0,0.666434,48.923077,0.428842,623.0,0.720231,0.795958,0.155654,0.045315,0.003074,0.29295,0.239695,0.227768,0.239587,1.0,75.0,228.0,-0.34,7.778.626.875,1.363.151.848,9.695.611.111,3.791.190.505,0.250396,0.236507,9.308.010.889,0.428515776,0.997035569,0.231981331,1.988.107.685,2.0,0.398994,0.438515,0.040232,0.107725,0.413528,0.215535,0.417419,0.208064,0.158982,0.300955,0.220056,0.478989,2.0,1.5,1.0,5.166667,5.5,3.033333,4.0,6.666667,6.5,5.5,3.0,5.0,0.533333,0.666667,0.7,0.616667,0.633333,22.0,28.0,11.0,7.0,11.0,0.0,5.0,2.0,6.0,4.0,4.0,7.0,8.0,8.0,0.0,5.0,4.0,-0.24,-0.33,3.64,0.91,8.0
3,105.0,0.029611,8.076923,0.13052,57.649231,0.170205,141.72,0.082966,13.0,0.064356,6.461538,0.305327,15.0,0.194805,267.0,0.046629,20.538462,0.180033,70.0,0.080925,0.789103,0.161515,0.037497,0.011885,0.297727,0.238513,0.230916,0.232844,1.0,38.0,191.0,-0.6,1.006.371.139,2.010.089.278,9.903.272.639,2.270.121.068,0.38186,0.254014,9.034.121.152,0.236068168,0.866593847,0.199153824,103.894.739,2.0,0.501416,0.157377,0.02898,0.053947,0.759697,0.245048,0.410218,0.190879,0.153855,0.226931,0.238211,0.534858,4.5,3.5,2.666667,6.0,6.0,4.533333,5.0,5.5,5.5,4.333333,2.833333,5.0,0.566667,0.683333,0.716667,0.533333,0.716667,23.0,28.0,9.0,6.0,7.0,1.0,5.0,6.0,7.0,6.0,8.0,8.0,8.0,6.0,3.0,2.0,6.0,-0.52,-2.03,3.98,0.9,8.0
4,162.0,0.045685,6.48,0.104714,159.4216,0.470679,836.46,0.489685,25.0,0.123762,6.6,0.31187,29.0,0.376623,394.0,0.068809,15.76,0.138147,61.0,0.07052,0.712697,0.196289,0.077861,0.013153,0.3001,0.235307,0.232877,0.231716,1.0,27.0,180.0,-0.7,1.197.692.111,2.333.088.249,7.537.312.014,1.568.276.525,0.250852,0.259828,9.557.120.015,0.486946776,2.016.371.149,0.467754404,1.159.895.116,2.0,0.493443,0.258578,0.073231,0.112001,0.55619,0.319698,0.32277,0.225624,0.131909,0.256794,0.232465,0.51074,2.166667,1.5,1.0,4.166667,4.833333,2.733333,5.833333,6.0,6.333333,5.0,4.166667,5.0,0.566667,0.783333,0.733333,0.733333,0.633333,17.0,24.0,17.0,13.0,8.0,3.0,4.0,6.0,7.0,7.0,6.0,7.0,6.0,6.0,2.0,4.0,7.0,0.42,-2.04,4.22,0.9,8.0
5,105.0,0.029611,5.526316,0.089303,43.632632,0.128822,169.2,0.099054,19.0,0.094059,4.526316,0.213882,17.0,0.220779,271.0,0.047328,14.263158,0.125026,43.0,0.049711,0.746439,0.193365,0.054215,0.005981,0.303593,0.235377,0.233832,0.227198,1.0,22.0,175.0,-0.75,8.437.013.889,2.777.083.337,1.210.607.806,141.848.135,0.3056,0.244289,870.027.021,0.36035958,178.005.748,0.421674039,0.970075875,1.0,0.555323,0.362543,0.000314,0.136496,0.500648,0.157227,0.472688,0.321906,0.048179,0.342162,0.120088,0.537751,3.166667,2.666667,3.5,5.166667,3.666667,3.633333,3.666667,5.833333,3.666667,3.0,3.666667,4.0,0.6,0.666667,0.483333,0.583333,0.433333,12.0,15.0,11.0,19.0,4.0,0.0,3.0,6.0,4.0,4.0,3.0,8.0,4.0,4.0,1.0,5.0,7.0,0.73,-1.48,3.74,0.91,8.0
6,1346.0,0.337682,8.518987,0.185779,19.887848,0.070957,169.82,0.04443,158.0,0.364055,1.740506,0.064235,9.0,0.028846,4025.0,0.329109,25.474684,0.187253,230.0,0.233503,0.76806,0.18186,0.043375,0.006704,0.29778,0.237582,0.227811,0.236827,2.0,22.0,69.0,-0.36,1.326.435.903,1.628.846.139,5.609.305.556,4.304.166.675,0.404612,0.268756,8.831.858.439,0.275771772,0.030908656,0.030908656,0.412769952,4.0,0.52,0.362597,0.134296,0.00189,0.501216,0.152454,0.624326,0.145505,0.077715,0.332323,0.221886,0.445791,1.833333,3.833333,3.666667,6.166667,5.5,4.2,3.5,4.0,6.333333,4.333333,2.833333,4.0,0.683333,0.733333,0.883333,0.583333,0.416667,24.0,26.0,13.0,13.0,15.0,1.0,5.0,6.0,7.0,7.0,6.0,5.0,8.0,8.0,3.0,4.0,4.0,0.38,-1.59,4.64,0.78,8.5
8,298.0,0.074762,11.461538,0.249949,162.1672,0.578592,1529.54,0.400177,26.0,0.059908,15.384615,0.567781,125.0,0.400641,764.0,0.062469,29.384615,0.215993,167.0,0.169543,0.782264,0.203949,0.01221,0.001577,0.300151,0.233183,0.230322,0.236344,2.0,8.0,55.0,-0.71,3.302.708.333,3.037.500.024,1.476.083.333,2.912.499.994,0.746794,0.579783,988.272.047,0.0,0.959899291,0.959899291,0,4.0,0.529475,0.029491,0.125285,0.000506,0.844718,0.121923,0.68549,0.083875,0.108711,0.535406,0.037475,0.427119,4.666667,3.666667,4.333333,4.5,4.666667,4.366667,3.666667,4.833333,6.0,5.333333,4.333333,5.0,0.533333,0.683333,0.716667,0.5,0.516667,28.0,20.0,21.0,23.0,24.0,3.0,3.0,4.0,6.0,5.0,4.0,6.0,4.0,5.0,4.0,6.0,8.0,1.77,-1.18,3.61,0.78,8.5
9,697.0,0.174862,8.822785,0.192404,47.935696,0.171028,1421.86,0.372004,79.0,0.182028,4.455696,0.164441,108.0,0.346154,2062.0,0.168602,26.101266,0.191859,257.0,0.260914,0.735439,0.228609,0.03055,0.005402,0.295444,0.239446,0.225465,0.239646,2.0,8.0,55.0,-0.71,2.095.583.333,3.866.666.635,1.623.819.444,2.291.666.687,0.42855,0.236425,9.147.384.644,0.061817313,0.254338987,0.127169494,0.061817313,1.0,0.518237,0.249223,0.057101,0.193481,0.500194,0.070225,0.553516,0.30907,0.067189,0.065323,0.31168,0.622997,3.0,2.666667,3.166667,5.0,5.0,3.766667,3.666667,5.833333,4.333333,3.0,2.833333,4.0,0.616667,0.7,0.716667,0.716667,0.55,17.0,23.0,16.0,13.0,14.0,2.0,4.0,6.0,7.0,6.0,4.0,4.0,3.0,6.0,2.0,6.0,4.0,1.72,-2.0,4.37,0.77,8.5
10,276.0,0.069242,6.272727,0.136793,25.912727,0.092453,218.84,0.057256,44.0,0.101382,3.113636,0.114911,24.0,0.076923,857.0,0.070074,19.477273,0.143169,78.0,0.079188,0.710042,0.214038,0.066733,0.009187,0.298211,0.23699,0.226584,0.238215,2.0,24.0,71.0,-0.32,1.213.291.667,1.720.833.361,9.400.256.481,287.179.486,0.259263,0.253813,9.067.036.629,0.03761133,0.098445511,0.098445511,0.03761133,3.0,0.497027,0.193271,0.03851,0.087476,0.680743,0.150721,0.602558,0.153305,0.093417,0.221334,0.277272,0.501394,1.5,2.0,1.333333,3.166667,5.0,2.6,5.333333,4.5,6.166667,5.5,3.166667,5.0,0.533333,0.7,0.583333,0.55,0.583333,27.0,26.0,18.0,18.0,18.0,0.0,5.0,3.0,5.0,4.0,3.0,4.0,8.0,8.0,3.0,3.0,8.0,0.9,-0.13,5.33,0.9,8.5
11,266.0,0.166562,9.851852,0.207658,95.963077,0.123819,1446.52,0.226016,27.0,0.174194,4.740741,0.115024,40.0,0.242424,543.0,0.164945,20.111111,0.200261,83.0,0.166,0.85065,0.140287,0.007156,0.001907,0.268266,0.257794,0.243564,0.230376,3.0,9.0,60.0,-0.7,4.099.444.444,2.966.666.698,1.503.222.222,200.999.999,0.41733,0.146854,867.734.657,0.900596442,0.174141428,0.080865228,224.194.098,4.0,0.54717,0.443086,0.000356,0.333701,0.222857,0.290449,0.443599,0.177075,0.088877,0.399318,0.391894,0.208788,1.666667,5.666667,3.166667,6.5,5.0,4.4,6.666667,4.166667,6.0,4.666667,4.0,5.0,0.583333,0.733333,0.616667,0.65,0.383333,23.0,23.0,20.0,23.0,18.0,1.0,5.0,6.0,7.0,5.0,7.0,8.0,8.0,6.0,2.0,6.0,6.0,0.17,-1.66,5.57,0.91,8.0


In [None]:
print(df.shape)

(57, 95)


In [None]:
df.to_excel(out_file)