In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier

from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

pd.set_option('display.max_columns', None, 'display.max_rows', None)

In [6]:
imagepath = Path('../images')
datapath = Path('../data')
full = 'cleaned_full_1.csv'

In [31]:
df = pd.read_csv(datapath/full)

In [45]:
# Lists of individual sections

moa = [c for c in df.columns if 'moa' in c]
idea = [c for c in df.columns if 'idea' in c]
swb = [c for c in df.columns if 'swb' in c]
mindful = [c for c in df.columns if 'mindful' in c]
belong = [c for c in df.columns if 'belong' in c]
efficacy = [c for c in df.columns if 'efficacy' in c]
support = [c for c in df.columns if 'support' in c]
transgres = [c for c in df.columns if 'transgres' in c]
exploit = [c for c in df.columns if 'exploit' in c]
stress = [c for c in df.columns if 'stress' in c]
marriage = [c for c in df.columns if 'marriage' in c]

# demographics
socmedia = [c for c in df.columns if 'socmedia' in c]
usdream = [c for c in df.columns if 'usdream' in c]
demo = [c for c in df.columns if 'demo' in c]
disability = [c for c in df.columns if 'disability' in c]
phys = [c for c in df.columns if 'phys' in c]

# Cols to use as features
skip = ['bias', 'duration']
mind_feats = [c for c in mindful if not any(s in c for s in skip)]
sup_feats = [c for c in support if not any(s in c for s in skip)]

sibs = [c for c in demo if 'sibs' in c]
parents = [c for c in demo if 'parents' in c]
fam_feats = parents+sibs

feat_lists = [mind_feats, sup_feats, fam_feats]

# Cols to use as targets
targets = [c for c in df.columns if 'target' in c] + ['belong_now', 'demo_politics']


In [70]:
df[targets].head(2)

Unnamed: 0,swb_target,efficacy_target,phys_target,stress_target,belong_now,demo_politics
0,neutral,very_high,med,high,4.0,2.0
1,neutral,very_high,med,high,4.0,1.0


In [56]:
@ignore_warnings(category=ConvergenceWarning)
def logreg_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
            X = StandardScaler().fit_transform(df[feat_list])
            y = df[target].values

            model = LogisticRegression(solver='lbfgs', multi_class='auto').fit(X, y)
            acc = model.score(X, y)

            print(f'{f} predicting {target}\naccuracy: {acc}\n')

In [57]:
logreg_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.5672328379334749

mindful_1 predicting efficacy_target
accuracy: 0.6036801132342534

mindful_1 predicting phys_target
accuracy: 0.829794762915782

mindful_1 predicting stress_target
accuracy: 0.7154989384288747

mindful_1 predicting belong_now
accuracy: 0.44904458598726116

mindful_1 predicting demo_politics
accuracy: 0.2618542108987969

support_1 predicting swb_target
accuracy: 0.6224345364472753

support_1 predicting efficacy_target
accuracy: 0.5665251238499646

support_1 predicting phys_target
accuracy: 0.8294409058740269

support_1 predicting stress_target
accuracy: 0.7147912243453645

support_1 predicting belong_now
accuracy: 0.46744515215852794

support_1 predicting demo_politics
accuracy: 0.24911535739561216

demo_parents0 predicting swb_target
accuracy: 0.502476999292286

demo_parents0 predicting efficacy_target
accuracy: 0.5226468506723284

demo_parents0 predicting phys_target
accuracy: 0.8290870488322718

demo_parents0 predicting st

In [68]:
def rf_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
            X = df[feat_list].values
            y = df[target].values

            model = RandomForestClassifier(n_estimators=100).fit(X, y)
            acc = model.score(X, y)

            print(f'{f} predicting {target}\naccuracy: {acc}\n')

In [69]:
rf_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.997169143665959

mindful_1 predicting efficacy_target
accuracy: 0.997523000707714

mindful_1 predicting phys_target
accuracy: 0.9978768577494692

mindful_1 predicting stress_target
accuracy: 0.997523000707714

mindful_1 predicting belong_now
accuracy: 0.9961075725406936

mindful_1 predicting demo_politics
accuracy: 0.994692144373673

support_1 predicting swb_target
accuracy: 0.943382873319179

support_1 predicting efficacy_target
accuracy: 0.9416135881104034

support_1 predicting phys_target
accuracy: 0.9699221514508138

support_1 predicting stress_target
accuracy: 0.943029016277424

support_1 predicting belong_now
accuracy: 0.9104741684359519

support_1 predicting demo_politics
accuracy: 0.8673036093418259

demo_parents0 predicting swb_target
accuracy: 0.5403397027600849

demo_parents0 predicting efficacy_target
accuracy: 0.5704175513092711

demo_parents0 predicting phys_target
accuracy: 0.8354564755838642

demo_parents0 predicting stress_ta

In [76]:
def gb_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
#             X = df[feat_list].values
            X = StandardScaler().fit_transform(df[feat_list])
            y = df[target].values

            model = GradientBoostingClassifier(learning_rate=0.01, n_estimators=100).fit(X, y)
            acc = model.score(X, y)

            print(f'{f} predicting {target}\naccuracy: {acc}\n')

In [77]:
gb_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.5845718329794763

mindful_1 predicting efficacy_target
accuracy: 0.6411889596602972

mindful_1 predicting phys_target
accuracy: 0.8294409058740269

mindful_1 predicting stress_target
accuracy: 0.7147912243453645

mindful_1 predicting belong_now
accuracy: 0.45506015569709835

mindful_1 predicting demo_politics
accuracy: 0.2760084925690021

support_1 predicting swb_target
accuracy: 0.6234961075725407

support_1 predicting efficacy_target
accuracy: 0.6139419674451522

support_1 predicting phys_target
accuracy: 0.8290870488322718

support_1 predicting stress_target
accuracy: 0.7158527954706299

support_1 predicting belong_now
accuracy: 0.48018400566171265

support_1 predicting demo_politics
accuracy: 0.2760084925690021

demo_parents0 predicting swb_target
accuracy: 0.5134465675866949

demo_parents0 predicting efficacy_target
accuracy: 0.5371549893842887

demo_parents0 predicting phys_target
accuracy: 0.829794762915782

demo_parents0 predicting st