# Tables

These are just to better view the tables of interest and do not contain any additional analyses.

In [2]:
using DataFrames
using CSV
using GLM
using Gadfly
using Statistics
using NamedArrays

DATA="/Users/aguang/CORE/tippingpoint/tippingpoint/data"
df_dv = CSV.File(joinpath(DATA,"df_dv.dat"),normalizenames=true) |> DataFrame!
df_gc = CSV.File(joinpath(DATA,"df_gc.dat"),normalizenames=true) |> DataFrame!
df_subject = CSV.File(joinpath(DATA,"df_subject.dat"),normalizenames=true) |> DataFrame!
full_df = CSV.File(joinpath(DATA,"full_df.dat"),normalizenames=true) |> DataFrame!

┌ Info: Recompiling stale cache file /Users/aguang/.julia/compiled/v1.1/DataFrames/AR9oZ.ji for DataFrames [a93c6f00-e57d-5684-b7b6-d8193f3e46c0]
└ @ Base loading.jl:1184
┌ Info: Recompiling stale cache file /Users/aguang/.julia/compiled/v1.1/CSV/HHBkp.ji for CSV [336ed68f-0bac-5ca0-87d4-7b16caf5d00b]
└ @ Base loading.jl:1184
┌ Info: Recompiling stale cache file /Users/aguang/.julia/compiled/v1.1/GLM/6OREG.ji for GLM [38e38edf-8417-5370-95a0-9cbb8c7f171a]
└ @ Base loading.jl:1184
┌ Info: Recompiling stale cache file /Users/aguang/.julia/compiled/v1.1/Gadfly/DvECm.ji for Gadfly [c91e804a-d5a3-530f-b6f0-dfbca275c004]
└ @ Base loading.jl:1184


Unnamed: 0_level_0,subj,graphid,tp,q,pt,risingBefore,cannotSeeAfter,downOverall
Unnamed: 0_level_1,Int64,String,Int64⍰,String,String,Int64,Int64,Int64
1,1,Q2 A,0,Q2,A,1,0,0
2,1,Q2 B,0,Q2,B,1,0,0
3,1,Q2 C,0,Q2,C,1,1,0
4,1,Q3 A,0,Q3,A,1,0,1
5,1,Q3 B,0,Q3,B,0,0,1
6,1,Q3 C,0,Q3,C,0,0,1
7,1,Q3 D,0,Q3,D,0,0,1
8,1,Q3 E,0,Q3,E,1,0,1
9,1,Q4 A,0,Q4,A,0,0,1
10,1,Q4 B,0,Q4,B,0,0,1


│   caller = compacttype(::Type, ::Int64) at show.jl:39
└ @ DataFrames /Users/aguang/.julia/packages/DataFrames/Iyo5L/src/abstractdataframe/show.jl:39


# Descriptive statistics

In [18]:
mean_sd = DataFrame(Variables=["risingBefore", "cannotSeeAfter",
        "downOverall", "bellOverall", "complexOverall", "uniBrown", "expExec", "tpChange", "tpRate", "tpDir", "tpNoReturn",
            "tellMgr", "impChange", "impRise", "impFall", "impPeriodic", "numOtherTP",
            "liwcPosemo", "liwcNegemo", "liwcCause", "liwcFocusPre", "liwcFocusFut",
            "liwcRelativ", "liwcTime"],
    Mean=[mean(skipmissing(col)) for col in eachcol(full_df[:,6:29])],
    SD=[std(skipmissing(col)) for col in eachcol(full_df[:,6:29])])
CSV.write("Figures/mean_sd.csv", mean_sd)

"Figures/mean_sd.csv"

In [21]:
mean_sd

Unnamed: 0_level_0,Variables,Mean,SD
Unnamed: 0_level_1,String,Float64,Float64
1,risingBefore,0.53125,0.499066
2,cannotSeeAfter,0.15625,0.363124
3,downOverall,0.65625,0.475001
4,bellOverall,0.0625,0.242083
5,complexOverall,0.896552,0.304573
6,uniBrown,0.280899,0.449478
7,expExec,0.134831,0.341573
8,tpChange,0.904494,0.293938
9,tpRate,0.241573,0.428074
10,tpDir,0.393258,0.488516


# Dropping missing data

Since most of the missing data came from 9 variables: importance of sustained change, importance rise, importance fall, importance periodic nature of occurrence, LIWC def cause, LIWC def focuspresent, LIWC def focusfuture, LIWC def relativ, LIWC def time, we decided to drop them.

The results of the logistic regression after dropping the columns increases the significance of `expExec` which supports **Hypothesis 3:** more experienced subjects are less likely to declare a tipping point. It also increases the significance of `liwcPosemo`. **Hypothesis 1** and **Hypothesis 2** are also supported.

In [19]:
dropped = select(full_df, Not([:impChange,:impRise,:impFall,:impPeriodic,:liwcCause,:liwcFocusPre,:liwcFocusFut,:liwcRelativ,:liwcTime]))
hp_dropped = glm(@formula(tp ~ risingBefore + cannotSeeAfter + downOverall + bellOverall +
        complexOverall + expExec + tpChange + tpRate + tpDir + tpNoReturn +
        tellMgr + numOtherTP + liwcPosemo + liwcNegemo), dropped, Binomial(), LogitLink())
df_hp=DataFrame(OR=exp.(coef(hp_dropped)),Lower95=exp.(confint(hp_dropped)[:,1]),Upper95=exp.(confint(hp_dropped)[:,2]))
df_hp.Variable=["Intercept", "Rising Before", "Cannot see after", "Down overall",
    "Bell overall", "Complex overall", "Experience (Executive or Undergrad)",
    "tp change", "tp rate", "tpdir", "tp no return", "tell manager",
    "numOtherTP", "liwcPosemo", "Negemo"]
CSV.write("Figures/odds_ratio.csv", df_hp)
df_hp

Unnamed: 0_level_0,OR,Lower95,Upper95,Variable
Unnamed: 0_level_1,Float64,Float64,Float64,String
1,0.472296,0.296731,0.751737,Intercept
2,2.41254,2.06124,2.82371,Rising Before
3,0.560543,0.445231,0.705719,Cannot see after
4,1.54685,1.23363,1.9396,Down overall
5,1.48527,1.06279,2.07569,Bell overall
6,1.07136,0.840729,1.36527,Complex overall
7,0.55315,0.458188,0.667793,Experience (Executive or Undergrad)
8,1.27785,1.01789,1.6042,tp change
9,1.11814,0.964336,1.29648,tp rate
10,0.971302,0.851118,1.10846,tpdir


In [20]:
hp_dropped

StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Array{Float64,1},Binomial{Float64},LogitLink},GLM.DensePredChol{Float64,LinearAlgebra.Cholesky{Float64,Array{Float64,2}}}},Array{Float64,2}}

tp ~ 1 + risingBefore + cannotSeeAfter + downOverall + bellOverall + complexOverall + expExec + tpChange + tpRate + tpDir + tpNoReturn + tellMgr + numOtherTP + liwcPosemo + liwcNegemo

Coefficients:
────────────────────────────────────────────────────────────────────────────────────
                  Estimate  Std. Error    z value  Pr(>|z|)   Lower 95%    Upper 95%
────────────────────────────────────────────────────────────────────────────────────
(Intercept)     -0.750149   0.237137    -3.16335     0.0016  -1.21493    -0.285369  
risingBefore     0.880678   0.0802935   10.9682      <1e-27   0.723306    1.03805   
cannotSeeAfter  -0.578849   0.117508    -4.92603     <1e-6   -0.809161   -0.348537  
downOverall      0.436222   0.115441     3.77873     0.0002   0.209961    0.66248

In [None]:
# OR table with just significant variables
#just_sig = df_hp[!,[:risingBefore,]]
#df_hp=DataFrame(OR=exp.(coef(hp_dropped)),Lower95=exp.(confint(hp_dropped)[:,1]),Upper95=exp.(confint(hp_dropped)[:,2]))
#df_hp.Variable=["Intercept", "Rising Before", "Cannot see after", "Down overall",
#    "Bell overall", "Complex overall", "Experience (Executive or Undergrad)",
#    "tp change", "tp rate", "tpdir", "tp no return", "tell manager",
#    "numOtherTP", "liwcPosemo", "Negemo"]
#CSV.write("Figures/odds_ratio.csv", df_hp)
#df_hp