In [1]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression, ElasticNet, SGDRegressor, BayesianRidge, Lasso
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.svm import SVR

from catboost import CatBoostRegressor
from xgboost.sklearn import XGBRegressor
from lightgbm import LGBMRegressor

import pandas as pd
import numpy as np

SEED = 1234
np.random.seed(SEED)

In [198]:
csv_file = "../merged_csv.csv"
df = pd.read_csv(csv_file)

df.columns

Index(['Post Created Date', 'Post Created Time', 'Total Interactions', 'Likes',
       'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care',
       'Message', 'Link', 'Score', 'comment_csv', 'comment_neg', 'comment_pos',
       'comment_total', 'timestamp', 'week_day', 'time_slot', 'r_pol',
       'c_pol'],
      dtype='object')

In [199]:
X = df[ ['comment_pos', 'comment_neg', 'c_pol'] ]
y = df["r_pol"]

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

std_scaler = StandardScaler()
X = std_scaler.fit_transform(X)

print("Scaled X")
print(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

Scaled X
[[-0.1916275  -0.52660675  0.34465116]
 [-0.9749287  -1.78517833  1.8690589 ]
 [-1.44490942 -1.15589254  0.50798056]
 ...
 [ 0.43501346 -0.52660675  0.55491429]
 [ 0.27835322 -0.31684482  0.33589019]
 [-0.50494798  0.73196484 -0.75923031]]


# Neighbors
### KNN Regressor

In [200]:
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

y_test_list = y_test.tolist()

# print("Actual\t\tPred")
# for i in range(len(y_pred)):
#     print(f"{y_test_list[i]} \t {y_pred[i]}")

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.34390690998297957
Mean Sq. Error: 0.24746981852899494
R2 Score: -0.2691184036850678


#   Linear Models
### Linear Reg

In [201]:
lin = LinearRegression()
lin.fit(X_train, y_train)
y_pred = lin.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.3074810450463995
Mean Sq. Error: 0.18205070582785624
R2 Score: 0.06637543703973581


### ElasticNet

In [202]:
eln = ElasticNet()
eln.fit(X_train, y_train)
y_pred = eln.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.32617369688055303
Mean Sq. Error: 0.19566465574784692
R2 Score: -0.003442023905638747


### SGDRegressor

In [203]:
sgd = SGDRegressor()
sgd.fit(X_train, y_train)
y_pred = sgd.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.30713744283340744
Mean Sq. Error: 0.18198889498827292
R2 Score: 0.06669242629737016


### Bayesian Ridge

In [204]:
bay = BayesianRidge()
bay.fit(X_train, y_train)
y_pred = bay.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.30788172879886294
Mean Sq. Error: 0.1817945848159504
R2 Score: 0.06768892202029742


#   Ensemble
### GradientBoostingRegressor

In [205]:
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
y_pred = gbr.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.30334640479474867
Mean Sq. Error: 0.18852463295880606
R2 Score: 0.033174701229434


### Random Forest

In [206]:
ranf = RandomForestRegressor()
ranf.fit(X_train, y_train)
y_pred = ranf.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.31473369616949165
Mean Sq. Error: 0.20247286236013246
R2 Score: -0.0383570707549965


#   Tree
### Decision Tree Reg

In [207]:
dec = DecisionTreeRegressor()
dec.fit(X_train, y_train)
y_pred = dec.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.31016850367804577
Mean Sq. Error: 0.2074507319711977
R2 Score: -0.06388546032629838


# Kernel
### KernelRidge

In [208]:
ker = KernelRidge()
ker.fit(X_train, y_train)
y_pred = ker.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.7719678274216661
Mean Sq. Error: 0.67441395513538
R2 Score: -2.4586486839164317


#   Boost
### Catboost

In [209]:
cat = CatBoostRegressor()
cat.fit(X_train, y_train)
y_pred = cat.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Learning rate set to 0.038906
0:	learn: 0.4225783	total: 2.27ms	remaining: 2.27s
1:	learn: 0.4215368	total: 2.9ms	remaining: 1.45s
2:	learn: 0.4204615	total: 3.46ms	remaining: 1.15s
3:	learn: 0.4194762	total: 3.98ms	remaining: 992ms
4:	learn: 0.4187362	total: 4.61ms	remaining: 917ms
5:	learn: 0.4179150	total: 5.26ms	remaining: 872ms
6:	learn: 0.4171965	total: 6ms	remaining: 851ms
7:	learn: 0.4163812	total: 6.58ms	remaining: 816ms
8:	learn: 0.4157268	total: 7.17ms	remaining: 790ms
9:	learn: 0.4150431	total: 7.71ms	remaining: 764ms
10:	learn: 0.4144233	total: 8.33ms	remaining: 749ms
11:	learn: 0.4138318	total: 8.93ms	remaining: 735ms
12:	learn: 0.4132921	total: 9.54ms	remaining: 724ms
13:	learn: 0.4128789	total: 10ms	remaining: 707ms
14:	learn: 0.4124627	total: 10.5ms	remaining: 688ms
15:	learn: 0.4119604	total: 11ms	remaining: 679ms
16:	learn: 0.4115880	total: 11.5ms	remaining: 667ms
17:	learn: 0.4110970	total: 12.1ms	remaining: 658ms
18:	learn: 0.4107050	total: 12.6ms	remaining: 649ms


### XGBoost

In [210]:
xgb = XGBRegressor()
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.31382391738599963
Mean Sq. Error: 0.20739116944115749
R2 Score: -0.06358000124650665


### LGBM

In [211]:
lgbm = LGBMRegressor()
lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.30868058521830555
Mean Sq. Error: 0.1932906629222715
R2 Score: 0.008732705130262142


#   SVM
### SVR

In [212]:
svr = SVR()
svr.fit(X_train, y_train)
y_pred = svr.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.2723429589224355
Mean Sq. Error: 0.21421878489268573
R2 Score: -0.09859458393107667


### Lasso

In [213]:
las = Lasso()
las.fit(X_train, y_train)
y_pred = las.predict(X_test)

print(f"Mean Abs Error: { mean_absolute_error(y_test, y_pred) }")
print(f"Mean Sq. Error: { mean_squared_error(y_test, y_pred) }")
print(f"R2 Score: { r2_score(y_test, y_pred) }")

Mean Abs Error: 0.32617369688055303
Mean Sq. Error: 0.19566465574784692
R2 Score: -0.003442023905638747
