In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import cross_val_score

from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

pd.set_option('display.max_columns', None, 'display.max_rows', None)

In [6]:
imagepath = Path('../images')
datapath = Path('../data')
full = 'cleaned_full_1.csv'

In [31]:
df = pd.read_csv(datapath/full)

In [45]:
# Lists of individual sections

moa = [c for c in df.columns if 'moa' in c]
idea = [c for c in df.columns if 'idea' in c]
swb = [c for c in df.columns if 'swb' in c]
mindful = [c for c in df.columns if 'mindful' in c]
belong = [c for c in df.columns if 'belong' in c]
efficacy = [c for c in df.columns if 'efficacy' in c]
support = [c for c in df.columns if 'support' in c]
transgres = [c for c in df.columns if 'transgres' in c]
exploit = [c for c in df.columns if 'exploit' in c]
stress = [c for c in df.columns if 'stress' in c]
marriage = [c for c in df.columns if 'marriage' in c]

# demographics
socmedia = [c for c in df.columns if 'socmedia' in c]
usdream = [c for c in df.columns if 'usdream' in c]
demo = [c for c in df.columns if 'demo' in c]
disability = [c for c in df.columns if 'disability' in c]
phys = [c for c in df.columns if 'phys' in c]

# Cols to use as features
skip = ['bias', 'duration']
mind_feats = [c for c in mindful if not any(s in c for s in skip)]
sup_feats = [c for c in support if not any(s in c for s in skip)]

sibs = [c for c in demo if 'sibs' in c]
parents = [c for c in demo if 'parents' in c]
fam_feats = parents+sibs

feat_lists = [mind_feats, sup_feats, fam_feats]

# Cols to use as targets
targets = [c for c in df.columns if 'target' in c] + ['belong_now', 'demo_politics']


In [70]:
df[targets].head(2)

Unnamed: 0,swb_target,efficacy_target,phys_target,stress_target,belong_now,demo_politics
0,neutral,very_high,med,high,4.0,2.0
1,neutral,very_high,med,high,4.0,1.0


In [79]:
@ignore_warnings(category=ConvergenceWarning)
def logreg_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
            X = StandardScaler().fit_transform(df[feat_list])
            y = df[target].values

            model = LogisticRegression(solver='lbfgs', multi_class='auto')
            acc = cross_val_score(model, X, y, cv=5)

            print(f'{f} predicting {target}\naccuracy: {np.mean(acc)}\n')

In [80]:
logreg_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.5587416742237092

mindful_1 predicting efficacy_target
accuracy: 0.5909378805550362

mindful_1 predicting phys_target
accuracy: 0.8290880384933462

mindful_1 predicting stress_target
accuracy: 0.7130115530160639

mindful_1 predicting belong_now
accuracy: 0.4412830429917286





mindful_1 predicting demo_politics
accuracy: 0.24521559500171422

support_1 predicting swb_target
accuracy: 0.6086387942086995

support_1 predicting efficacy_target
accuracy: 0.5566140878355677

support_1 predicting phys_target
accuracy: 0.8287346816028869

support_1 predicting stress_target
accuracy: 0.7144400170413252

support_1 predicting belong_now
accuracy: 0.45717105787287354





support_1 predicting demo_politics
accuracy: 0.2402616309181868

demo_parents0 predicting swb_target
accuracy: 0.49788110947809494

demo_parents0 predicting efficacy_target
accuracy: 0.5102460812603307

demo_parents0 predicting phys_target
accuracy: 0.8290880384933462

demo_parents0 predicting stress_target
accuracy: 0.7147933739317847

demo_parents0 predicting belong_now
accuracy: 0.4430373523730326





demo_parents0 predicting demo_politics
accuracy: 0.24523187150191683



In [81]:
def rf_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
            X = df[feat_list].values
            y = df[target].values

            model = RandomForestClassifier(n_estimators=100)
            acc = cross_val_score(model, X, y, cv=5)

            print(f'{f} predicting {target}\naccuracy: {np.mean(acc)}\n')

In [82]:
rf_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.5148585008912099

mindful_1 predicting efficacy_target
accuracy: 0.5902299314776386

mindful_1 predicting phys_target
accuracy: 0.8259128386336867

mindful_1 predicting stress_target
accuracy: 0.7048743202265493

mindful_1 predicting belong_now
accuracy: 0.42323797178134326





mindful_1 predicting demo_politics
accuracy: 0.21943509597365654

support_1 predicting swb_target
accuracy: 0.5725432314956691

support_1 predicting efficacy_target
accuracy: 0.552001661551386

support_1 predicting phys_target
accuracy: 0.8117384657795153

support_1 predicting stress_target
accuracy: 0.6744693381450017

support_1 predicting belong_now
accuracy: 0.41470050471559405





support_1 predicting demo_politics
accuracy: 0.18933996632156286

demo_parents0 predicting swb_target
accuracy: 0.4854892273054192

demo_parents0 predicting efficacy_target
accuracy: 0.5021357965633031

demo_parents0 predicting phys_target
accuracy: 0.824138535949678

demo_parents0 predicting stress_target
accuracy: 0.7098426183494976

demo_parents0 predicting belong_now
accuracy: 0.41613080301073013





demo_parents0 predicting demo_politics
accuracy: 0.22578309539063368



In [83]:
def gb_tests(df, feat_lists, targets):
    for feat_list in feat_lists:
        f = feat_list[0]
        for target in targets:
            X = df[feat_list].values
            y = df[target].values

            model = GradientBoostingClassifier(learning_rate=0.01, n_estimators=100)
            acc = cross_val_score(model, X, y, cv=3)

            print(f'{f} predicting {target}\naccuracy: {np.mean(acc)}\n')

In [None]:
gb_tests(df, feat_lists, targets)

mindful_1 predicting swb_target
accuracy: 0.5520125081054845

mindful_1 predicting efficacy_target
accuracy: 0.6160678221545054

mindful_1 predicting phys_target
accuracy: 0.8290876717168176

mindful_1 predicting stress_target
accuracy: 0.7147920831907394

mindful_1 predicting belong_now
accuracy: 0.44090770918353833



