# OLS Predictor Model

## Preparation

### Import

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
import statsmodels.api as sm
import random
import itertools
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Option

In [None]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)

## Function Definition

### Min-Max Scaler

In [None]:
def min_max_scaling_df(df):

    scaler = MinMaxScaler()
    scaled_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
    
    return scaled_df

In [None]:
def scale_dataframe_min_max_and_percentile(df):

    # Initialize MinMaxScaler
    scaler = MinMaxScaler()

    # Create a copy of the DataFrame
    scaled_df = df.copy()

    # Get the column names
    columns = df.columns

    # Scale each column using min-max scaling, except the last column
    for col in columns[:-1]:
        scaled_df[col] = scaler.fit_transform(df[[col]])

    # Scale the last column using percentiles
    scaled_df[columns[-1]] = df[columns[-1]].rank(pct=True)

    return scaled_df


### Prepare Dataframe for Modeling

In [None]:
def prepare_df_for_modeling(df):

  # Strip off leading and trailing whitespace from non-numeric columns
  df = df.select_dtypes(include=['int', 'float'])
  df = df[df['theory'] >= 3]
  df.reset_index(drop=True, inplace=True)
  
  return df

### Create Random Df

In [None]:
def create_random_df(df):
  limit0= 0
  limit1= 28 #28 Column: messages_sent
  limit2= 45 #45 Column: Groupflow_Antflow
  limit3= 81 #Index: 81 Column: transcendence

  # Generate two random integers within the range 1-8
  range_1 = random.sample(range(limit0, limit1), 2)

  # Generate two random integers within the range 9-16
  range_2 = random.sample(range(limit1, limit2), 2)

  # Generate two random integers within the range 17-25
  range_3 = random.sample(range(limit2, limit3), 2)

  # Combine all six integers into a single list
  column_indexes = range_1 + range_2 + range_3

  new_df = df.iloc[:, column_indexes].copy()
  data = pd.concat([new_df, df['theory']], axis=1)
  #data = min_max_scaling_df(data)
  selected_columns = df.columns[column_indexes].to_list()

  return data,selected_columns

### Select Columns by Index

In [None]:
def select_columns_by_index(df, column_indexes):

    selected_columns = df.iloc[:, column_indexes].copy()
    selected_column_names = df.columns[column_indexes].tolist()
    
    return selected_columns, selected_column_names

In [None]:
def select_columns_by_index(df, column_indexes):
  
    cols= column_indexes
    cols.append(len(df.columns)-1)
    selected_columns = df.iloc[:, cols].copy()
    #selected_column_names = df.columns[column_indexes].tolist()
    
    return selected_columns

### Generate Index

In [None]:
def generate_features_index(my_list):
  # Define the ranges for each item
  #selected_features = ['indiv_spoken_time_ratio', 'contribution_index', 'in_group_loyality_score']
  range_1 = [0] + list(range(2, 21))
  range_2 = [21] + list(range(23, 38))
  range_3 = list(range(38,60))

  # Generate all combinations
  combinations = list(itertools.product(range_1, range_2, range_3,range_3))

  # Convert each combination tuple to a list
  combinations_as_list = [list(combination) + my_list for combination in combinations]

  return combinations_as_list

### Rescale Metric

In [None]:
def rescale_rmse(rmse, original_column):
    # Calculate the range of the original column
    column_range = original_column.max() - original_column.min()

    # Rescale the RMSE to the original units
    rmse_rescaled = rmse * column_range

    return rmse_rescaled


### Return Dataframe by Columns

In [None]:
def return_df_by_columns(df,column_names):

  new_df = df[column_names].copy()
  data = pd.concat([new_df, df['theory']], axis=1)
  data = min_max_scaling_df(data)

  return data

### Evaluate Regression Models by Cross-Validation

In [None]:
def evaluate_regression_models(df):
    # Select predictors (X) and target variable (y)
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]

    # Initialize lists to store scores
    mse_scores_linear = []
    rmse_scores_linear = []
    mae_scores_linear = []
    r2_scores_linear = []

    mse_scores_ridge = []
    rmse_scores_ridge = []
    mae_scores_ridge = []
    r2_scores_ridge = []

    # Define the number of folds for cross-validation
    n_splits = 5

    # Initialize the KFold splitter
    kf = KFold(n_splits=n_splits)

    # Perform cross-validation
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Initialize and fit the Linear Regression model
        model_linear = LinearRegression()
        model_linear.fit(X_train, y_train)

        # Initialize and fit the Ridge Regression model
        model_ridge = Ridge(alpha=1.0)  # You can adjust the regularization parameter (alpha) as needed
        model_ridge.fit(X_train, y_train)

        # Obtain predictions for Linear Regression
        y_pred_linear = model_linear.predict(X_test)

        # Obtain predictions for Ridge Regression
        y_pred_ridge = model_ridge.predict(X_test)

        # Calculate evaluation metrics for Linear Regression
        mse_scores_linear.append(mean_squared_error(y_test, y_pred_linear))
        rmse_scores_linear.append(np.sqrt(mean_squared_error(y_test, y_pred_linear)))
        mae_scores_linear.append(mean_absolute_error(y_test, y_pred_linear))
        r2_scores_linear.append(r2_score(y_test, y_pred_linear))

        # Calculate evaluation metrics for Ridge Regression
        mse_scores_ridge.append(mean_squared_error(y_test, y_pred_ridge))
        rmse_scores_ridge.append(np.sqrt(mean_squared_error(y_test, y_pred_ridge)))
        mae_scores_ridge.append(mean_absolute_error(y_test, y_pred_ridge))
        r2_scores_ridge.append(r2_score(y_test, y_pred_ridge))

    # Create a dictionary to store the evaluation metrics
    results = {
        'Linear Regression': {
            'Mean Squared Error (MSE)': np.mean(mse_scores_linear),
            'Root Mean Squared Error (RMSE)': np.mean(rmse_scores_linear),
            'Mean Absolute Error (MAE)': np.mean(mae_scores_linear),
            'R-squared (R2)': np.mean(r2_scores_linear)
        },
        'Ridge Regression': {
            'Mean Squared Error (MSE)': np.mean(mse_scores_ridge),
            'Root Mean Squared Error (RMSE)': np.mean(rmse_scores_ridge),
            'Mean Absolute Error (MAE)': np.mean(mae_scores_ridge),
            'R-squared (R2)': np.mean(r2_scores_ridge)
        }
    }

    return results

## Use of Function

### Fetch the data

In [None]:
in_file= r'/content/drive/MyDrive/Projects/tps/finals/data/3_individual_features.xlsx'

In [None]:
df= pd.read_excel(in_file, index_col='Id')
not_used_columns=['avg_time_without_speaking_ratio', 'max_time_without_speaking_ratio', 'avg_turns_without_speaking_ratio', 'max_turns_without_speaking_ratio', 'avg_words_turn_ratio', 'max_words_turn', 'max_words_turn_ratio', 'messages_total', 'alter_art', 'alter_nudges', 'complexity_avg', 'dummy_question1', 'dummy_question2', 'q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'conservation', 'transcendence', 'coeval', 'project']
df.drop(columns=not_used_columns, inplace=True)

### Prepare the data

In [None]:
df= prepare_df_for_modeling(df)
df= min_max_scaling_df(df)

NameError: ignored

In [None]:
df.head(65)

Unnamed: 0,indiv_spoken_time,indiv_spoken_time_ratio,average_turn_duration,average_turn_duration_ratio,avg_time_without_speaking,max_time_without_speaking,num_turns,num_turns_ratio,avg_turns_without_speaking,max_turns_without_speaking,num_words,num_words_ratio,avg_words_turn,speech_neu,speech_ang,speech_hap,speech_sad,text_joy,text_anger,text_fear,text_sadness,messages_sent,contribution_index,sentiment_avg,emotionality_avg,contribution_index_oscillation,activity_entanglement,ALTERNATIVE_REALITIES_Treehugger,ALTERNATIVE_REALITIES_Fatherlander,ALTERNATIVE_REALITIES_Spiritualism,ALTERNATIVE_REALITIES_Nerd,EMOTIONS_Fear,EMOTIONS_Happy,EMOTIONS_Sad,EMOTIONS_Anger,Groupflow_Beeflow,Groupflow_Leechflow,Groupflow_Antflow,ethical_likelihood,financial_likelihood,health_likelihood,recreational_likelihood,social_likelihood,total_likelihood,ethical_perceived,financial_perceived,health_perceived,recreational_perceived,social_perceived,total_perceived,O,C,E,A,N,harm_care_score,fairness_reciprocity_score,in_group_loyality_score,authority_respect_score,purity_sanctity_score,theory
0,0.0091,0.159657,0.082674,0.123789,0.087042,0.010795,0.04914,0.747201,0.034444,0.006818,0.023084,0.237878,0.082873,0.642186,0.38644,0.307156,0.160233,0.779269,0.288472,0.251824,0.489964,0.242424,0.394366,0.374747,0.158705,0.1,0.556037,0.483112,0.241864,0.089009,0.356028,0.495483,0.197535,0.505977,0.353427,0.763429,0.340511,0.184244,0.512195,0.326087,0.395833,0.295455,0.034483,0.305,0.551724,0.482143,0.52381,0.710526,0.55,0.5,0.35,0.6,0.291667,0.421053,0.5,0.941176,0.611111,0.733333,0.631579,0.818182,0.171206
1,0.047339,0.866398,0.527474,0.735671,0.022087,0.001899,0.058149,0.889212,0.013586,0.011364,0.093908,1.0,0.487778,0.44984,0.491922,0.562257,0.141686,0.667926,0.351012,0.132738,0.921348,0.55303,0.816901,0.308486,0.207085,0.2,0.260331,0.448759,0.115995,0.107596,0.414859,0.450916,0.212901,0.392314,0.540566,0.458956,0.479239,0.362381,0.146341,0.065217,0.0,0.568182,0.448276,0.195,0.37931,0.589286,0.642857,0.657895,0.275,0.5,0.15,0.4,0.541667,0.368421,0.615385,0.647059,0.888889,0.2,0.052632,0.409091,0.249027
2,0.001514,0.019441,0.086528,0.129092,0.089756,0.002743,0.004914,0.050058,0.270625,0.025,0.005341,0.046945,0.152957,0.413946,0.525981,0.457121,0.569071,0.758693,0.325351,0.214353,0.537218,0.272727,0.450704,0.472333,0.247524,0.2,0.601957,0.161053,0.083552,0.05381,0.762142,0.514874,0.201805,0.347123,0.522739,0.344269,0.51878,0.43731,0.512195,0.326087,0.208333,0.681818,0.551724,0.42,0.586207,0.464286,0.5,0.473684,0.25,0.5,0.25,0.466667,0.583333,0.105263,0.807692,0.705882,0.888889,0.066667,0.0,0.227273,0.381323
3,0.002541,0.038425,0.059311,0.091652,0.281816,0.0208,0.014742,0.204979,0.277931,0.056818,0.00851,0.08105,0.096591,0.013899,0.728057,1.0,0.630565,0.803801,0.255744,0.26519,0.472923,0.189394,0.309859,0.309055,0.260954,0.2,0.575362,0.264618,0.211134,0.111873,0.557981,0.676648,0.067056,0.43849,0.446423,0.390537,0.506265,0.404965,0.170732,0.065217,0.0,0.431818,0.310345,0.15,0.758621,0.517857,0.619048,0.578947,0.45,0.5,0.25,0.866667,0.625,0.736842,0.615385,0.352941,0.666667,0.6,0.368421,0.272727,0.474708
4,0.001514,0.019441,0.043057,0.069292,0.063305,0.003457,0.009828,0.127518,0.168522,0.029545,0.00544,0.048019,0.078935,0.190567,0.711064,0.681968,0.282712,0.870162,0.257264,0.289959,0.215557,0.151515,0.239437,0.377288,0.22506,0.1,0.781762,0.371012,0.000905,0.136371,0.502259,0.324557,0.298065,0.691679,0.155269,0.522797,0.261518,0.44119,0.317073,0.217391,0.3125,0.568182,0.068966,0.285,0.310345,0.5,0.238095,0.263158,0.375,0.333333,0.35,0.4,0.0,0.263158,0.153846,0.058824,0.166667,0.2,0.684211,0.090909,0.287938
5,0.023877,0.383276,0.094063,0.209266,0.018495,0.003473,0.123669,0.831623,0.02154,0.011364,0.099124,0.481305,0.211184,0.303771,0.644211,0.536176,0.317784,0.759701,0.305134,0.133864,0.764116,0.151515,0.788732,0.500689,0.281576,0.4,0.663945,0.371067,0.387194,0.001746,0.50283,0.314213,0.531722,0.227807,0.257975,0.507554,0.483225,0.317858,0.121951,0.369565,0.333333,0.704545,0.448276,0.37,0.275862,0.303571,0.619048,0.473684,0.25,0.333333,0.6,0.666667,1.0,0.263158,0.115385,0.764706,0.777778,0.333333,0.368421,0.590909,0.638132
6,0.004992,0.072764,0.144214,0.302368,0.286997,0.038813,0.015561,0.038457,0.741414,0.275,0.017744,0.071301,0.257305,0.378141,0.772571,0.117019,0.069064,0.80476,0.209627,0.198967,0.736581,0.045455,0.295775,0.927155,1.0,0.4,0.695545,0.03018,0.361211,0.000362,0.847437,0.248049,0.625971,0.065743,0.36576,0.822193,0.081596,0.292815,0.536585,0.347826,0.416667,0.477273,0.275862,0.395,0.310345,0.392857,0.571429,0.631579,0.475,0.5,0.15,0.466667,0.583333,0.0,0.346154,1.0,0.444444,0.866667,0.894737,1.0,0.237354
7,0.012182,0.190984,0.099241,0.218878,0.071425,0.036014,0.058968,0.356925,0.164796,0.236364,0.050136,0.234498,0.218575,0.132972,0.915872,0.363684,0.254621,0.715311,0.345596,0.073036,0.924691,0.045455,0.295775,0.530523,0.206895,0.1,0.658063,0.255045,0.164631,0.193365,0.501804,0.136015,0.422613,0.657924,0.221372,0.093888,0.678788,0.555518,0.292683,0.217391,0.270833,0.545455,0.344828,0.305,0.310345,0.5,0.333333,0.263158,0.25,0.333333,0.4,0.533333,0.583333,0.684211,0.423077,0.352941,0.611111,0.533333,0.368421,0.545455,0.533074
8,0.004595,0.066246,0.055779,0.138194,0.029865,0.004747,0.030303,0.146616,0.093987,0.045455,0.020064,0.082994,0.14044,0.0,0.831201,0.850331,0.438176,0.767906,0.292273,0.102047,0.843183,0.166667,0.84507,0.319537,0.24706,0.3,0.587317,0.197786,0.111029,0.087345,0.682935,0.310456,0.49818,0.248317,0.312577,0.335598,0.60385,0.39243,0.073171,0.130435,0.041667,0.295455,0.344828,0.13,0.655172,0.357143,0.595238,0.657895,0.3,0.5,0.15,0.533333,0.25,0.157895,0.5,0.941176,0.777778,0.666667,0.631579,0.727273,0.906615
9,0.004415,0.181182,0.116779,0.24101,0.16206,0.036655,0.01638,0.336495,0.179835,0.081818,0.012228,0.228876,0.147916,0.736196,0.402627,0.049037,0.085088,0.19886,0.743965,0.542253,0.396619,0.05303,0.309859,0.516539,0.0,0.4,0.754567,0.453436,0.001026,0.333605,0.223575,0.613262,0.253241,0.310824,0.296791,0.61135,0.853486,0.0,0.097561,0.608696,0.270833,0.75,0.344828,0.4,0.931034,0.321429,0.571429,0.526316,0.425,0.5,0.3,0.666667,0.333333,0.473684,0.038462,0.705882,0.611111,0.8,0.894737,0.727273,1.0


In [None]:
df.shape

(55, 61)

In [None]:
for index, column_name in enumerate(df.columns):
    print(f"{index} {column_name}")

0 indiv_spoken_time
1 indiv_spoken_time_ratio
2 average_turn_duration
3 average_turn_duration_ratio
4 avg_time_without_speaking
5 max_time_without_speaking
6 num_turns
7 num_turns_ratio
8 avg_turns_without_speaking
9 max_turns_without_speaking
10 num_words
11 num_words_ratio
12 avg_words_turn
13 speech_neu
14 speech_ang
15 speech_hap
16 speech_sad
17 text_joy
18 text_anger
19 text_fear
20 text_sadness
21 messages_sent
22 contribution_index
23 sentiment_avg
24 emotionality_avg
25 contribution_index_oscillation
26 activity_entanglement
27 ALTERNATIVE_REALITIES_Treehugger
28 ALTERNATIVE_REALITIES_Fatherlander
29 ALTERNATIVE_REALITIES_Spiritualism
30 ALTERNATIVE_REALITIES_Nerd
31 EMOTIONS_Fear
32 EMOTIONS_Happy
33 EMOTIONS_Sad
34 EMOTIONS_Anger
35 Groupflow_Beeflow
36 Groupflow_Leechflow
37 Groupflow_Antflow
38 ethical_likelihood
39 financial_likelihood
40 health_likelihood
41 recreational_likelihood
42 social_likelihood
43 total_likelihood
44 ethical_perceived
45 financial_perceived
46 he

### Use of Model

In [None]:
def create_models_given_random(df):
  # Create an empty list to store the results
  results_list = []

  # Loop through iterations
  for x in range(0, 500):
      df_test, index = create_random_df(df)
      results = evaluate_regression_models(df_test)
      linear_reg_results = results.get('Linear Regression', {})
      ridge_reg_results = results.get('Ridge Regression', {})

      result_dict = {'Index': index,
                    'LinearRegression_MSE': linear_reg_results.get('Mean Squared Error (MSE)'),
                    'LinearRegression_RMSE': linear_reg_results.get('Root Mean Squared Error (RMSE)'),
                    'LinearRegression_MAE': linear_reg_results.get('Mean Absolute Error (MAE)'),
                    'LinearRegression_R2': linear_reg_results.get('R-squared (R2)'),
                    'Ridge_MSE': ridge_reg_results.get('Mean Squared Error (MSE)'),
                    'Ridge_RMSE': ridge_reg_results.get('Root Mean Squared Error (RMSE)'),
                    'Ridge_MAE': ridge_reg_results.get('Mean Absolute Error (MAE)'),
                    'Ridge_R2': ridge_reg_results.get('R-squared (R2)'),}

      results_list.append(result_dict)

  # Create the DataFrame from the results list
  results_df = pd.DataFrame(results_list)


In [None]:
# Create an empty list to store the results
results_list = []

my_list = [1, 22]
#selected_features = ['indiv_spoken_time_ratio', 'contribution_index', 'in_group_loyality_score']

all_comb= generate_features_index(my_list)

for combination in all_comb:
  df_test= select_columns_by_index(df,combination)
  index=df_test.columns.to_list()
  results = evaluate_regression_models(df_test)
  linear_reg_results = results.get('Linear Regression', {})
  ridge_reg_results = results.get('Ridge Regression', {})
  dt_reg_results = results.get('Decision Tree Regression', {})

  result_dict = {'Index': index,
                  'LinearRegression_MSE': linear_reg_results.get('Mean Squared Error (MSE)'),
                  'LinearRegression_RMSE': linear_reg_results.get('Root Mean Squared Error (RMSE)'),
                  'LinearRegression_MAE': linear_reg_results.get('Mean Absolute Error (MAE)'),
                  'LinearRegression_R2': linear_reg_results.get('R-squared (R2)'),
                  'Ridge_MSE': ridge_reg_results.get('Mean Squared Error (MSE)'),
                  'Ridge_RMSE': ridge_reg_results.get('Root Mean Squared Error (RMSE)'),
                  'Ridge_MAE': ridge_reg_results.get('Mean Absolute Error (MAE)'),
                  'Ridge_R2': ridge_reg_results.get('R-squared (R2)'),}

  results_list.append(result_dict)

# Create the DataFrame from the results list
results_df = pd.DataFrame(results_list)

In [None]:
# Sort the DataFrame by lowest LinearRegression_RMSE
results_df_sorted = results_df.sort_values(by='LinearRegression_RMSE')
results_df_sorted.head(100)
results_df_sorted.iloc[0]

Index                    [text_anger, messages_sent, social_perceived, ...
LinearRegression_MSE                                              0.049896
LinearRegression_RMSE                                             0.218898
LinearRegression_MAE                                               0.17345
LinearRegression_R2                                               0.240337
Ridge_MSE                                                         0.060116
Ridge_RMSE                                                        0.241278
Ridge_MAE                                                         0.198344
Ridge_R2                                                          0.082611
DecisionTree_MSE                                                      None
DecisionTree_RMSE                                                     None
DecisionTree_MAE                                                      None
DecisionTree_R2                                                       None
Name: 131870, dtype: obje

In [None]:
results_df.describe()

Unnamed: 0,LinearRegression_MSE,LinearRegression_RMSE,LinearRegression_MAE,LinearRegression_R2,Ridge_MSE,Ridge_RMSE,Ridge_MAE,Ridge_R2
count,154880.0,154880.0,154880.0,154880.0,154880.0,154880.0,154880.0,154880.0
mean,0.203105,0.32995,0.256244,-1.848023,0.078206,0.274583,0.227713,-0.202002
std,0.413075,0.127727,0.057416,5.393993,0.00615,0.010922,0.010854,0.097757
min,0.049896,0.218898,0.169962,-50.417556,0.058436,0.236593,0.185863,-0.685158
25%,0.079168,0.275989,0.226626,-0.517183,0.074254,0.267515,0.220426,-0.260899
50%,0.086262,0.288841,0.239427,-0.335786,0.078186,0.275305,0.228988,-0.205003
75%,0.098525,0.307199,0.255362,-0.217274,0.081653,0.281462,0.235198,-0.136829
max,3.92365,1.094647,0.569391,0.240337,0.112564,0.324921,0.269696,0.11108


In [None]:
out_file= r'/content/drive/MyDrive/Projects/tps/finals/data/ALL_model_regression_try_all_features.xlsx'
results_df_sorted.head(1000).to_excel(out_file)

In [None]:
# Sort the DataFrame by lowest LinearRegression_RMSE
results_df_sorted_r2 = results_df.sort_values(by='LinearRegression_R2', ascending=False)
out_file= r'/content/drive/MyDrive/Projects/tps/finals/data/ALL_model_regression_r2.xlsx'
results_df_sorted_r2.head(1000).to_excel(out_file)
results_df_sorted_r2.head(100)

Unnamed: 0,Index,LinearRegression_MSE,LinearRegression_RMSE,LinearRegression_MAE,LinearRegression_R2,Ridge_MSE,Ridge_RMSE,Ridge_MAE,Ridge_R2,DecisionTree_MSE,DecisionTree_RMSE,DecisionTree_MAE,DecisionTree_R2
4905,"[indiv_spoken_time, EMOTIONS_Happy, health_lik...",3.92365,1.094647,0.537202,-50.417556,0.071652,0.262212,0.216161,-0.093629,,,,
5304,"[indiv_spoken_time, EMOTIONS_Happy, purity_san...",3.92365,1.094647,0.537202,-50.417556,0.071652,0.262212,0.216161,-0.093629,,,,
7737,"[indiv_spoken_time, Groupflow_Antflow, purity_...",3.820327,1.086655,0.55526,-49.081041,0.079414,0.277884,0.232738,-0.221813,,,,
7611,"[indiv_spoken_time, Groupflow_Antflow, A, puri...",3.820327,1.086655,0.55526,-49.081041,0.079414,0.277884,0.232738,-0.221813,,,,
6703,"[indiv_spoken_time, Groupflow_Beeflow, fairnes...",3.72383,1.071308,0.553777,-47.814725,0.077851,0.274999,0.229991,-0.193408,,,,
6640,"[indiv_spoken_time, Groupflow_Beeflow, A, fair...",3.72383,1.071308,0.553777,-47.814725,0.077851,0.274999,0.229991,-0.193408,,,,
5172,"[indiv_spoken_time, EMOTIONS_Happy, A, health_...",3.69482,1.074674,0.536638,-47.444571,0.073224,0.265299,0.218857,-0.118791,,,,
4899,"[indiv_spoken_time, EMOTIONS_Happy, health_lik...",3.69482,1.074674,0.536638,-47.444571,0.073224,0.265299,0.218857,-0.118791,,,,
5191,"[indiv_spoken_time, EMOTIONS_Happy, A, purity_...",3.642554,1.072411,0.555578,-46.767353,0.081704,0.28147,0.237269,-0.25593,,,,
5317,"[indiv_spoken_time, EMOTIONS_Happy, purity_san...",3.642554,1.072411,0.555578,-46.767353,0.081704,0.28147,0.237269,-0.25593,,,,


In [None]:
rmse= 0.17
# Assuming 'rmse' is the RMSE value and 'target_column' is the original column of the objective variable
rescaled_rmse = rescale_rmse(rmse,df['theory'])
print("Rescaled RMSE:", rescaled_rmse)

Rescaled RMSE: 0.9469000000000001
