In [2]:
# data manipulation imports
import numpy as np
import pandas as pd

# data saving imports
import pickle
import os

# custom imports
from regression_class import RedditRegression as RR
from regression_class import TimestampClass

In [18]:
# infiles
regression_infile = "regression_thread_data.p"
thread_infile = 'clean_5_thread_data.p'

# outfiles
outdir = 'logistic_regression/logregs_23102023'
metrics_outfile = "regression_metrics"

In [4]:
# read in files
regression_df = pickle.load(open(regression_infile, 'rb'))
thread_df = pickle.load(open(thread_infile, 'rb'))

In [23]:
subreddits = ['books', 'crypto', 'conspiracy', 'politics']

In [24]:
# params
X_COLS = [
    'sentiment_sign', 'sentiment_magnitude', 'hour', 'num_dayofweek','activity_ratio',
    'mean_author_sentiment_sign', 'mean_author_sentiment_magnitude', 'log_author_all_activity_count',
    ]

# want to run over multiple collection windows and multiple activity thresholds
collection_windows = [1, 3, 7, 14]
activity_thresholds = [0, 1, 2, 5]

# make regression params dict to feed to logreg
regression_params_dict = {}

# store outdir names
outdir_names = {}

# make out params dict to save spreadsheets
out_params_dict = {}



for activity_threshold_size in activity_thresholds:
    regression_params_dict[activity_threshold_size] = {}
    out_params_dict[activity_threshold_size] = {}
    outdir_names[activity_threshold_size] = f'{outdir}/activity_threshold_{activity_threshold_size}'

    for collection_window_size in collection_windows:
        regression_params_dict[activity_threshold_size][collection_window_size] = (
            {
            'regression_type': "logistic",
            'collection_window': collection_window_size,
            'validation_window': 7,
            'FSS': True,
            'performance_scoring_method': 'roc_auc',
            'x_cols': X_COLS,
            'y_col': 'success',
            'metrics': ['roc_auc', 'aic'],
            'activity_threshold': activity_threshold_size,
            'FSS': True,
            }
        )
        out_params_dict[activity_threshold_size][collection_window_size] = {
            'regression_infile': regression_infile,
            'thread_infile': thread_infile
        }


In [20]:
# make out directories
for outdirname in [outdir] + list(outdir_names.values()):
    if not os.path.isdir(outdirname):
        os.mkdir(outdirname)

In [27]:
# place to store logregs
subreddit_logregs = {}

# go through activity thresholds, collection windows and subreddits
for activity_threshold_size in regression_params_dict:
    print(f'###{activity_threshold_size}###')
    subreddit_logregs[activity_threshold_size] = {}
    for collection_window_size in regression_params_dict[activity_threshold_size]:
        print(f'##{collection_window_size}##')
        subreddit_logregs[activity_threshold_size][collection_window_size] = {}
        for subreddit in subreddits:
            print(f'#{subreddit}#')
            regression_params_dict[activity_threshold_size][collection_window_size]['name'] = subreddit
            regression_params_dict[activity_threshold_size][collection_window_size]['regression_data'] = regression_df[subreddit]
            regression_params_dict[activity_threshold_size][collection_window_size]['thread_data'] = thread_df[subreddit]

            subreddit_logregs[activity_threshold_size][collection_window_size][subreddit] = (
                RR(
                    regression_params=
                    regression_params_dict[activity_threshold_size][collection_window_size]
                    )
            )

            subreddit_logregs[activity_threshold_size][collection_window_size][subreddit].main()



###0###
##1##
#books#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.638385
         Iterations 4
Model 2
Optimization terminated successfully.
         Current function value: 0.636099
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.636067
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.635839
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.635675
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.634446
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.634294
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.634106
         Iterations 5
#crypto#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.600076
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.599034
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.598444
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.597218
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.596721
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.596529
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.593682
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.591712
         Iterations 5
#conspiracy#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.408617
         Iterations 6
Model 2
Optimization terminated successfully.
         Current function value: 0.406022
         Iterations 7
Model 3
Optimization terminated successfully.
         Current function value: 0.405355
         Iterations 7
Model 4
Optimization terminated successfully.
         Current function value: 0.404340
         Iterations 7
Model 5
Optimization terminated successfully.
         Current function value: 0.404185
         Iterations 7
Model 6
Optimization terminated successfully.
         Current function value: 0.403754
         Iterations 7
Model 7
Optimization terminated successfully.
         Current function value: 0.403750
         Iterations 7
Model 8
Optimization terminated successfully.
         Current function value: 0.403710
         Iterations 7
#politics#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.624368
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.623388
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.619996
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.618442
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.618113
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.617771
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.617749
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.615370
         Iterations 5
##3##
#books#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.632964
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.632393
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.632018
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.630748
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.630300
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.630288
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.629962
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.629127
         Iterations 5
#crypto#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.573472
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.572911
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.572621
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.572298
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.571308
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.571207
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.569587
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.566770
         Iterations 5
#conspiracy#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.402574
         Iterations 6
Model 2
Optimization terminated successfully.
         Current function value: 0.398185
         Iterations 7
Model 3
Optimization terminated successfully.
         Current function value: 0.397484
         Iterations 7
Model 4
Optimization terminated successfully.
         Current function value: 0.397275
         Iterations 7
Model 5
Optimization terminated successfully.
         Current function value: 0.397006
         Iterations 7
Model 6
Optimization terminated successfully.
         Current function value: 0.396526
         Iterations 7
Model 7
Optimization terminated successfully.
         Current function value: 0.396509
         Iterations 7
Model 8
Optimization terminated successfully.
         Current function value: 0.396486
         Iterations 7
#politics#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.600810
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.597381
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.594680
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.593992
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.593691
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.593554
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.593422
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.591034
         Iterations 5
##7##
#books#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.629590
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.628590
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.628251
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.627771
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.626618
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.626441
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.625472
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.624503
         Iterations 5
#crypto#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.549537
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.548634
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.548246
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.543259
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.542745
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.542717
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.542650
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.542306
         Iterations 5
#conspiracy#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.397481
         Iterations 6
Model 2
Optimization terminated successfully.
         Current function value: 0.392650
         Iterations 7
Model 3
Optimization terminated successfully.
         Current function value: 0.392181
         Iterations 7
Model 4
Optimization terminated successfully.
         Current function value: 0.390470
         Iterations 7
Model 5
Optimization terminated successfully.
         Current function value: 0.390371
         Iterations 7
Model 6
Optimization terminated successfully.
         Current function value: 0.390352
         Iterations 7
Model 7
Optimization terminated successfully.
         Current function value: 0.390097
         Iterations 7
Model 8
Optimization terminated successfully.
         Current function value: 0.390047
         Iterations 7
#politics#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.592303
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.589155
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.587641
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.587064
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.586852
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.586762
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.586537
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.584646
         Iterations 5
##14##
#books#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.630312
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.629502
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.627126
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.626495
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.625898
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.625063
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.625057
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.625029
         Iterations 5
#crypto#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.536127
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.535552
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.534944
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.533658
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.533628
         Iterations 6
Model 6
Optimization terminated successfully.
         Current function value: 0.533621
         Iterations 6
Model 7
Optimization terminated successfully.
         Current function value: 0.533609
         Iterations 6
Model 8
Optimization terminated successfully.
         Current function value: 0.529690
         Iterations 5
#conspiracy#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.384263
         Iterations 6
Model 2
Optimization terminated successfully.
         Current function value: 0.379524
         Iterations 7
Model 3
Optimization terminated successfully.
         Current function value: 0.378613
         Iterations 7
Model 4
Optimization terminated successfully.
         Current function value: 0.375560
         Iterations 7
Model 5
Optimization terminated successfully.
         Current function value: 0.374591
         Iterations 7
Model 6
Optimization terminated successfully.
         Current function value: 0.374385
         Iterations 7
Model 7
Optimization terminated successfully.
         Current function value: 0.374375
         Iterations 7
Model 8
Optimization terminated successfully.
         Current function value: 0.374280
         Iterations 7
#politics#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.586644
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.583591
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.582219
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.581812
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.581596
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.581505
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.581264
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.579977
         Iterations 5
###1###
##1##
#books#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.623152
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.617283
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.610623
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.610458
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.609982
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.609730
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.608919
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.603812
         Iterations 6
#crypto#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.578220
         Iterations 5
Model 2
Optimization terminated successfully.
         Current function value: 0.576443
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 0.569880
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 0.568855
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 0.568446
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 0.568266
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 0.568152
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 0.568103
         Iterations 5
#conspiracy#
# Period 1 #
Performing thresholding
Performing thresholding
Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 0.278534
         Iterations 7
Model 2
Optimization terminated successfully.
         Current function value: 0.277809
         Iterations 7
Model 3
Optimization terminated successfully.
         Current function value: 0.277484
         Iterations 7
Model 4
Optimization terminated successfully.
         Current function value: 0.276267
         Iterations 7
Model 5
Optimization terminated successfully.
         Current function value: 0.275997
         Iterations 7
Model 6
Optimization terminated successfully.
         Current function value: 0.275996
         Iterations 7
Model 7
Optimization terminated successfully.
         Current function value: 0.275991
         Iterations 7
Model 8
Optimization terminated successfully.
         Current function value: 0.275950
         Iterations 7
#politics#
