In [1]:
# data manipulation imports
import pandas as pd
import numpy as np

# data saving imports
import pickle

# custom imports
from regression_class import RedditRegression as RR
from regression_class import TimestampClass
from regression_class import QuantileClass as qc

# stats imports
import statsmodels.formula.api as smf
from sklearn import linear_model
from sklearn import metrics
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler

# plotting imports
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [2]:
data = pickle.load(open('test_data_crypto_5_days.p', 'rb'))
regression_threads = data['regression_data']
all_data = data['all_data']

In [3]:
X_COLS = [
    "sentiment_sign",
    "sentiment_magnitude",
    "hour",
    "time_in_secs",
    "num_dayofweek",
    "activity_ratio",
    "mean_author_sentiment_sign",
    "mean_author_sentiment_magnitude",
    "author_all_activity_count",
]
y_col = "thread_size"
quantiles = [0.25, 0.5, 0.75]
thresholds = {
    "author_all_activity_count": 2,
    "thread_size": 2,
}

regression_params = {
    'name': 'crypto',
    'regression_data': regression_threads,
    'thread_data': all_data,
    'regression_type': 'mnlogit',
    'collection_window': 2,
    'model_window': 2,
    'validation_window': 1,
    'FSS': True,
    'performance_scoring_method': "mnlogit",
    'x_cols': X_COLS,
    'y_col': y_col,
    'metrics': ['mnlogit_accuracy', 'mnlogit_aucs', "mnlogit_mean_auc"],
    "thresholds": thresholds,
    'quantiles': quantiles,
}

In [4]:
test_mnlogit = RR(regression_params)

In [5]:
test_mnlogit.main()

Running FSS


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


Model 1
Optimization terminated successfully.
         Current function value: 1.368356
         Iterations 4
Model 2
Optimization terminated successfully.
         Current function value: 1.359630
         Iterations 5
Model 3
Optimization terminated successfully.
         Current function value: 1.342283
         Iterations 5
Model 4
Optimization terminated successfully.
         Current function value: 1.328100
         Iterations 6
Model 5
Optimization terminated successfully.
         Current function value: 1.317655
         Iterations 6
Model 6
Optimization terminated successfully.
         Current function value: 1.313113
         Iterations 6
Model 7
Optimization terminated successfully.
         Current function value: 1.312710
         Iterations 6
Model 8
Optimization terminated successfully.
         Current function value: 1.305442
         Iterations 6
Model 9
Optimization terminated successfully.
         Current function value: 1.302246
         Iterations 6


In [6]:
test_mnlogit.quantile_data

{'quantile_ranges': [(2, 10), (11.0, 22), (23.0, 40), (41.0, 3465)],
 'quantile_counts':               count  val_count
 range                         
 (2, 10)          64         49
 (11.0, 22)       80         53
 (23.0, 40)       71         26
 (41.0, 3465)     75         47,
 'val_quantile_ranges': [(2, 10), (11.0, 22), (23.0, 40), (41.0, 3813)]}

In [7]:
test_mnlogit.FSS_metrics['metric_df']

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(3,)",[0.3137931034482759],0.313793,"(time_in_secs,)",,0.0,
2,"(3, 6)",[0.35517241379310344],0.355172,"(time_in_secs, mean_author_sentiment_sign)",,0.0,
3,"(2, 3, 6)",[0.35517241379310344],0.355172,"(hour, time_in_secs, mean_author_sentiment_sign)",,0.0,
4,"(2, 3, 6, 8)",[0.3620689655172414],0.362069,"(hour, time_in_secs, mean_author_sentiment_sig...",,0.0,
5,"(2, 3, 5, 6, 8)",[0.3793103448275862],0.37931,"(hour, time_in_secs, activity_ratio, mean_auth...",,0.0,
6,"(0, 2, 3, 5, 6, 8)",[0.3896551724137931],0.389655,"(sentiment_sign, hour, time_in_secs, activity_...",,0.0,
7,"(0, 2, 3, 4, 5, 6, 8)",[0.3896551724137931],0.389655,"(sentiment_sign, hour, time_in_secs, num_dayof...",,0.0,
8,"(0, 1, 2, 3, 4, 5, 6, 8)",[0.38620689655172413],0.386207,"(sentiment_sign, sentiment_magnitude, hour, ti...",,0.0,
9,"(0, 1, 2, 3, 4, 5, 6, 7, 8)",[0.38620689655172413],0.386207,"(sentiment_sign, sentiment_magnitude, hour, ti...",,0.0,


In [8]:
outdir = 'C:/Users/snuzz/Documents/PhD/datasets/reddit_analyses/reddit_analyses/mnlogit/test'
outpath = outdir + '/test2_output.xlsx'
test_mnlogit.output_to_excel(outpath)