THIS FILE PERFORMS THE LINEAR REGRESSION

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm


LOAD DATA FROM CSV TO DF

In [None]:
new_df = pd.read_csv('./../data/compiled-posts/normalised_labelled_data.csv')

LINEAR REGRESSION

In [None]:
features = ['Toxicity', 'Rationality', 'Mutual Respect', 'Emotion', 'Moderator', 'Diversity']
target_combative = 'Combative'
target_deliberative = 'Deliberative'

# Split the data into features and targets
X = new_df[features]
y_combative = new_df[target_combative]
y_deliberative = new_df[target_deliberative]

# Split the data into training and testing sets
X_train, X_test, y_combative_train, y_combative_test = train_test_split(X, y_combative, test_size=0.2, random_state=42)
X_train, X_test, y_deliberative_train, y_deliberative_test = train_test_split(X, y_deliberative, test_size=0.2, random_state=42)

# Initialize and fit the linear regression model for Combative
model_combative = LinearRegression()
model_combative.fit(X_train, y_combative_train)
y_combative_pred = model_combative.predict(X_test)
mse_combative = mean_squared_error(y_combative_test, y_combative_pred)

# Initialize and fit the linear regression model for Deliberative
model_deliberative = LinearRegression()
model_deliberative.fit(X_train, y_deliberative_train)
y_deliberative_pred = model_deliberative.predict(X_test)
mse_deliberative = mean_squared_error(y_deliberative_test, y_deliberative_pred)

In [None]:
#  Printing the MSE and the coefficients with corresponding feature names
combative_coef = dict(zip(features, model_combative.coef_))
deliberative_coef = dict(zip(features, model_deliberative.coef_))

MSE AND COEFFICIENT VALUES:

In [None]:
print("Mean Squared Error for Combative Prediction:", mse_combative)
print("Mean Squared Error for Deliberative Prediction:", mse_deliberative)
print("\nCombative Comments Coefficients:")
for feature, coef in combative_coef.items():
    print(f"{feature}: {coef}")

print("\nDeliberative Comments Coefficients:")
for feature, coef in deliberative_coef.items():
    print(f"{feature}: {coef}")

P-VALUE CALCULATION

In [None]:
# Fit the regression model for combative comments using statsmodels to get p-values
X_train_sm = sm.add_constant(X_train)  # Adds a constant term to the predictors
model_combative_sm = sm.OLS(y_combative_train, X_train_sm).fit()
p_values_combative = model_combative_sm.pvalues

# Fit the regression model for deliberative comments using statsmodels to get p-values
model_deliberative_sm = sm.OLS(y_deliberative_train, X_train_sm).fit()
p_values_deliberative = model_deliberative_sm.pvalues

# Print the p-values
print("\nP-values for Combative Comments Regression Model:")
print(p_values_combative)

print("\nP-values for Deliberative Comments Regression Model:")
print(p_values_deliberative)