In [1]:
# data manipulation imports
import pandas as pd
import numpy as np

# data saving imports
import pickle

# custom imports
from regression_class import RedditRegression as RR
from regression_class import TimestampClass
from regression_class import QuantileClass as qc

# stats imports
import statsmodels.formula.api as smf
from sklearn import linear_model
from sklearn import metrics
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler

# plotting imports
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [2]:
data = pickle.load(open('test_data_crypto_5_days.p', 'rb'))
regression_threads = data['regression_data']
all_data = data['all_data']
threads_started = regression_threads[regression_threads.thread_size > 1]
started_threads_all_data = all_data[all_data.thread_id.isin(threads_started.thread_id)]

In [3]:
X_COLS = [
    "sentiment_sign",
    "sentiment_magnitude",
    "hour",
    "num_dayofweek",
    "activity_ratio",
    "mean_author_sentiment_sign",
    "mean_author_sentiment_magnitude",
    "log_author_all_activity_count",
]
y_col = "thread_size"
quantiles = [0.25, 0.5, 0.75]

regression_params = {
    'name': 'crypto',
    'regression_data': threads_started,
    'thread_data': started_threads_all_data,
    'regression_type': 'mnlogit',
    'collection_window': 2,
    'model_window': 2,
    'validation_window': 1,
    'FSS': True,
    'performance_scoring_method': "mnlogit",
    'x_cols': X_COLS,
    'y_col': y_col,
    'metrics': ['mnlogit_accuracy', 'mnlogit_aucs', "mnlogit_mean_auc"],
    'activity_threshold': 2,
    'quantiles': quantiles,
}

In [4]:
test_mnlogit = RR(regression_params)

In [5]:
test_mnlogit.main()

Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 1.367844
         Iterations 4
Model 2
Optimization terminated successfully.
         Current function value: 1.359493
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 1.355116
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 1.344040
         Iterations 5
Model 5
Optimization terminated successfully.
         Current function value: 1.336794
         Iterations 5
Model 6
Optimization terminated successfully.
         Current function value: 1.335738
         Iterations 5
Model 7
Optimization terminated successfully.
         Current function value: 1.328613
         Iterations 5
Model 8
Optimization terminated successfully.
         Current function value: 1.328160
         Iterations 5


In [6]:
test_mnlogit.quantile_data

{'quantile_ranges': [(2, 10), (11.0, 22), (23.0, 40), (41.0, 3465)],
 'quantile_counts':               count  val_count
 range                         
 (2, 10)          62         49
 (11.0, 22)       80         52
 (23.0, 40)       71         26
 (41.0, 3465)     74         46,
 'val_quantile_ranges': [(2, 10), (11.0, 22), (23.0, 40), (41.0, 3813)]}

In [7]:
test_mnlogit.FSS_metrics['metric_df']

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(2,)",[0.31010452961672474],0.310105,"(hour,)",,0.0,
2,"(2, 5)",[0.3588850174216028],0.358885,"(hour, mean_author_sentiment_sign)",,0.0,
3,"(0, 2, 5)",[0.3519163763066202],0.351916,"(sentiment_sign, hour, mean_author_sentiment_s...",,0.0,
4,"(0, 2, 4, 5)",[0.3693379790940767],0.369338,"(sentiment_sign, hour, activity_ratio, mean_au...",,0.0,
5,"(0, 1, 2, 4, 5)",[0.37630662020905925],0.376307,"(sentiment_sign, sentiment_magnitude, hour, ac...",,0.0,
6,"(0, 1, 2, 4, 5, 6)",[0.3797909407665505],0.379791,"(sentiment_sign, sentiment_magnitude, hour, ac...",,0.0,
7,"(0, 1, 2, 4, 5, 6, 7)",[0.37282229965156793],0.372822,"(sentiment_sign, sentiment_magnitude, hour, ac...",,0.0,
8,"(0, 1, 2, 3, 4, 5, 6, 7)",[0.3623693379790941],0.362369,"(sentiment_sign, sentiment_magnitude, hour, nu...",,0.0,


In [8]:
outdir = 'C:/Users/snuzz/Documents/PhD/datasets/reddit_analyses/reddit_analyses/mnlogit/test'
outpath = outdir + '/test_output.xlsx'

In [9]:
test_mnlogit.output_to_excel(outpath)