In [1]:
# data manipulation imports
import pandas as pd
import numpy as np

# data saving imports
import pickle

# custom imports
from regression_class import RedditRegression as RR
from regression_class import TimestampClass
from regression_class import QuantileClass as qc

# stats imports
import statsmodels.formula.api as smf
from sklearn import linear_model
from sklearn import metrics
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.preprocessing import StandardScaler

# plotting imports
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [2]:
data = pickle.load(open('test_data_crypto_5_days.p', 'rb'))
regression_threads = data['regression_data']
all_data = data['all_data']

In [3]:
REGRESSION_TYPES = ["linear", "mnlogit"]

In [4]:
subreddit="crypto"

In [30]:
X_COLS = [
    "sentiment_sign",
    "sentiment_magnitude",
    "time_in_secs",
    "num_dayofweek",
    "activity_ratio",
    "mean_author_sentiment_sign",
    "mean_author_sentiment_magnitude",
    "author_all_activity_count",
]


In [31]:
extra_params = {
        "collection_window": 2,
        "model_window": 2,
        "validation_window":1,
        "x_cols": X_COLS,
    }

In [32]:
param_dicts = {}

for key in REGRESSION_TYPES:
    param_dicts[key] = RR.create_param_dict(subreddit, key, regression_threads, all_data, **extra_params)

In [33]:
regmods = {}

for key in param_dicts:
    regmods[key] = RR(param_dicts[key])
    regmods[key].main()

regression_class_crypto_linear - INFO - Running FSS
regression_class_crypto_linear - INFO - Running FSS
regression_class_crypto_linear - INFO - Running FSS
regression_class_crypto_linear - INFO - Model 1
regression_class_crypto_linear - INFO - Model 1
regression_class_crypto_linear - INFO - Model 1
regression_class_crypto_linear - INFO - Model 2
regression_class_crypto_linear - INFO - Model 2
regression_class_crypto_linear - INFO - Model 2
regression_class_crypto_linear - INFO - Model 3
regression_class_crypto_linear - INFO - Model 3
regression_class_crypto_linear - INFO - Model 3
regression_class_crypto_linear - INFO - Model 4
regression_class_crypto_linear - INFO - Model 4
regression_class_crypto_linear - INFO - Model 4
regression_class_crypto_linear - INFO - Model 5
regression_class_crypto_linear - INFO - Model 5
regression_class_crypto_linear - INFO - Model 5
regression_class_crypto_linear - INFO - Model 6
regression_class_crypto_linear - INFO - Model 6
regression_class_crypto_line

In [35]:
regmods['mnlogit'].quantile_metrics

Unnamed: 0,quantile_ranges,val_quantile_ranges,count,val_count
0,"(0.6931471805599453, 2.3025850929940455)","(0.6931471805599453, 2.3025850929940455)",64,49
1,"(2.3978952727983707, 3.0910424533583156)","(2.3978952727983707, 3.0910424533583156)",80,53
2,"(3.1354942159291497, 3.6888794541139363)","(3.1354942159291497, 3.6888794541139363)",71,26
3,"(3.7135720667043075, 8.150467911624004)","(3.7135720667043075, 8.246171559857563)",75,47


In [37]:
regmods['mnlogit'].FSS_metrics['metric_df']

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(2,)",[0.3137931034482759],0.313793,"(time_in_secs,)",,0.0,
2,"(2, 5)",[0.35517241379310344],0.355172,"(time_in_secs, mean_author_sentiment_sign)",,0.0,
3,"(0, 2, 5)",[0.35172413793103446],0.351724,"(sentiment_sign, time_in_secs, mean_author_sen...",,0.0,
4,"(0, 2, 4, 5)",[0.38275862068965516],0.382759,"(sentiment_sign, time_in_secs, activity_ratio,...",,0.0,
5,"(0, 2, 3, 4, 5)",[0.38620689655172413],0.386207,"(sentiment_sign, time_in_secs, num_dayofweek, ...",,0.0,
6,"(0, 1, 2, 3, 4, 5)",[0.3793103448275862],0.37931,"(sentiment_sign, sentiment_magnitude, time_in_...",,0.0,
7,"(0, 1, 2, 3, 4, 5, 6)",[0.3896551724137931],0.389655,"(sentiment_sign, sentiment_magnitude, time_in_...",,0.0,
8,"(0, 1, 2, 3, 4, 5, 6, 7)",[0.38275862068965516],0.382759,"(sentiment_sign, sentiment_magnitude, time_in_...",,0.0,


In [38]:
regmods['linear'].FSS_metrics['metric_df']

Unnamed: 0,feature_idx,cv_scores,avg_score,feature_names,ci_bound,std_dev,std_err
1,"(7,)",[0.029507379653760935],0.029507,"(author_all_activity_count,)",,0.0,
2,"(2, 7)",[0.04093726915509699],0.040937,"(time_in_secs, author_all_activity_count)",,0.0,
3,"(0, 2, 7)",[0.044909324848692145],0.044909,"(sentiment_sign, time_in_secs, author_all_acti...",,0.0,
4,"(0, 2, 5, 7)",[0.04944855537396975],0.049449,"(sentiment_sign, time_in_secs, mean_author_sen...",,0.0,
5,"(0, 2, 5, 6, 7)",[0.05142275571548782],0.051423,"(sentiment_sign, time_in_secs, mean_author_sen...",,0.0,
6,"(0, 2, 3, 5, 6, 7)",[0.052574817068336555],0.052575,"(sentiment_sign, time_in_secs, num_dayofweek, ...",,0.0,
7,"(0, 2, 3, 4, 5, 6, 7)",[0.05323692728830831],0.053237,"(sentiment_sign, time_in_secs, num_dayofweek, ...",,0.0,
8,"(0, 1, 2, 3, 4, 5, 6, 7)",[0.05326781384636059],0.053268,"(sentiment_sign, sentiment_magnitude, time_in_...",,0.0,
