In [1]:
data_root <- "~/Data/Promotion/reg_results_F_M/"

In [2]:
# install.packages("lme4")
# install.packages("margins")
# install.packages("stargazer")
# install.packages("emmeans")
# install.packages("ggeffects")
# install.packages("broom")
# install.packages("sjmisc")
# install.packages("broom.mixed")

In [3]:
library("lme4")
library("margins")
library("stargazer")
library("emmeans")
library("ggeffects")
library("broom")
library("broom.mixed")

Loading required package: Matrix

Please cite as: 

 Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.2. https://CRAN.R-project.org/package=stargazer 



In [4]:
# https://www.rdocumentation.org/packages/ggeffects/versions/1.1.1
# https://www.rdocumentation.org/packages/ggeffects/versions/1.1.1/topics/ggeffect
packageVersion("ggeffects")

[1] ‘1.1.1’

In [7]:
# library("sjmisc")
# ?sjmisc::typical_value

In [5]:
options(repr.matrix.max.rows=500, repr.matrix.max.cols=200)

In [6]:
mydata <- read.csv(paste("~/Data/Promotion/reg_data_drop_missing.csv", sep = ''), header = TRUE)

In [7]:
# this is after dropping missing data
# before dropping obs with missing values: 6363409
nrow(mydata)

In [8]:
ncol(mydata)

In [9]:
mydata = mydata[mydata$gender %in% c("Male", "Female"), ]

In [10]:
nrow(mydata)

In [11]:
mydata = mydata[mydata$affiliation_cate != 'unknown', ]

In [12]:
nrow(mydata)

In [13]:
mydata <- within(mydata, gender <- relevel(gender, ref = 'Male'))
mydata <- within(mydata, authorship_pos <- relevel(authorship_pos, ref = 'last_position'))
mydata <- within(mydata, affiliation_cate <- relevel(affiliation_cate, ref = 'domestic'))

In [14]:
keywords <- " + Social_Sciences + Materials_Science + Engineering + Chemistry + \
        Biochemistry__Genetics_and_Molecular_Biology + Medicine + Nursing + Agricultural_and_Biological_Sciences + \
        Pharmacology__Toxicology_and_Pharmaceutics + Neuroscience + Business__Management_and_Accounting + \
        Economics__Econometrics_and_Finance + Chemical_Engineering + Physics_and_Astronomy + Computer_Science + \
        Decision_Sciences + Health_Professions + Psychology + Immunology_and_Microbiology + Dentistry + \
        Earth_and_Planetary_Sciences + Environmental_Science + Mathematics + Arts_and_Humanities + Energy + \
        Veterinary + General"

In [15]:
mydata$author_citation_log <- log2(1 + mydata$author_citation)

### Original tweets

In [17]:
base_str <- "self_promotion_original ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_org <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [18]:
# summary(m_org)

In [18]:
# without author citation control
tidy(m_org)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.879199754,0.01436067,-200.492028,0.0
fixed,,genderFemale,-0.27846734,0.0071646,-38.867118,0.0
fixed,,authorship_posfirst_position,0.356724229,0.009279095,38.443859,0.0
fixed,,authorship_posmiddle_position,-0.879134042,0.008393956,-104.734175,0.0
fixed,,authorship_possolo_author,0.789776601,0.01870395,42.225126,0.0
fixed,,author_pub_count_cate,0.280368278,0.004581549,61.195084,0.0
fixed,,I(author_pub_count_cate^2),-0.030458369,0.0005681461,-53.610099,0.0
fixed,,affiliation_rank_cate,-0.05313944,0.001281,-41.482771,0.0
fixed,,affiliation_cateinternational,0.061704091,0.007745272,7.966678,1.629962e-15
fixed,,num_authors,-0.001512682,7.530442e-05,-20.087563,9.480228e-90


In [19]:
tidy(m_org)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.851691705,0.01443349,-197.574639,0.0
fixed,,genderFemale,-0.27706099,0.007166555,-38.660278,0.0
fixed,,authorship_posfirst_position,0.343344667,0.009311306,36.873954,1.208646e-297
fixed,,authorship_posmiddle_position,-0.883427253,0.008400939,-105.158159,0.0
fixed,,authorship_possolo_author,0.780133868,0.01873507,41.640307,0.0
fixed,,author_pub_count_cate,0.35307594,0.006097561,57.904457,0.0
fixed,,I(author_pub_count_cate^2),-0.032447389,0.0005793496,-56.006577,0.0
fixed,,affiliation_rank_cate,-0.054494684,0.001284198,-42.434806,0.0
fixed,,affiliation_cateinternational,0.051878119,0.007770933,6.675919,2.456883e-11
fixed,,num_authors,-0.001519923,7.537603e-05,-20.164536,2.0061899999999998e-90


In [20]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_org, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [21]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.09318671,0.008416714,0.09180203,0.09459009,1
Female,0.07226592,0.009460752,0.07103257,0.07351899,1


In [22]:
fname = paste(data_root, 'pred_original.csv', sep = '')
write.csv(MEs, fname, row.names = FALSE)

### Retweets

In [23]:
base_str <- "self_promotion_retweet ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"


In [24]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_retweet <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [25]:
tidy(m_retweet)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-3.632307796,0.0186314479,-194.955744,0.0
fixed,,genderFemale,-0.250228543,0.0085830867,-29.1536775,7.503807e-187
fixed,,authorship_posfirst_position,0.28799761,0.0121866082,23.6323023,1.79485e-123
fixed,,authorship_posmiddle_position,-0.459530546,0.0104171289,-44.112975,0.0
fixed,,authorship_possolo_author,0.509622221,0.0283022279,18.0064348,1.734464e-72
fixed,,author_pub_count_cate,0.272779332,0.0073240789,37.2441827,1.316141e-303
fixed,,I(author_pub_count_cate^2),-0.02179812,0.0006797217,-32.0691847,1.186118e-225
fixed,,affiliation_rank_cate,-0.055432671,0.0016196035,-34.2260744,9.901163e-257
fixed,,affiliation_cateinternational,0.04447096,0.0102902347,4.3216663,1.548553e-05
fixed,,num_authors,-0.001679848,0.0001311825,-12.8054242,1.52888e-37


In [26]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_retweet, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [27]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.04070993,0.01199497,0.03980167,0.04163803,1
Female,0.03198595,0.01302369,0.03120496,0.03278582,1


In [28]:
fname = paste(data_root, 'pred_retweet.csv', sep = '')
write.csv(MEs, fname, row.names = FALSE)

### All tweets

In [29]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"

In [30]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_all_tw <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [31]:
tidy(m_all_tw)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.723197676,0.01384108,-196.747464,0.0
fixed,,genderFemale,-0.275823356,0.006462962,-42.677546,0.0
fixed,,authorship_posfirst_position,0.319948786,0.00877129,36.47682,2.585283e-291
fixed,,authorship_posmiddle_position,-0.679409761,0.007624535,-89.108353,0.0
fixed,,authorship_possolo_author,0.752657111,0.01972544,38.156679,0.0
fixed,,author_pub_count_cate,0.336465524,0.00551838,60.971795,0.0
fixed,,I(author_pub_count_cate^2),-0.028507249,0.0005144134,-55.417,0.0
fixed,,affiliation_rank_cate,-0.053857091,0.001218774,-44.189547,0.0
fixed,,affiliation_cateinternational,0.022644346,0.007701103,2.940403,0.003277857
fixed,,num_authors,-0.001670857,9.619019e-05,-17.37035,1.383788e-67


In [32]:
# use this search string in Sublime: " \\\\ \n  &" with " &"
stargazer(m_all_tw, type = "latex", single.row=TRUE, ci = FALSE, report = ('vc*p'), star.cutoffs = c(0.05, 0.01, 0.001))


% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu
% Date and time: Tue, May 10, 2022 - 04:00:36 PM
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
 & \multicolumn{1}{c}{\textit{Dependent variable:}} \\ 
\cline{2-2} 
\\[-1.8ex] & self\_promotion \\ 
\hline \\[-1.8ex] 
 genderFemale & $-$0.276$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posfirst\_position & 0.320$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posmiddle\_position & $-$0.679$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_possolo\_author & 0.753$^{***}$ \\ 
  & p = 0.000 \\ 
  author\_pub\_count\_cate & 0.336$^{***}$ \\ 
  & p = 0.000 \\ 
  I(author\_pub\_count\_cate$\hat{\mkern6mu}$2) & $-$0.029$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_rank\_cate & $-$0.054$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_cateinternational & 0.023$^{**}$ \\ 
  & p = 0.004 \\ 
  num\_authors & $-$0.00

In [33]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_all_tw, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [34]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.10400141,0.008660582,0.10243025,0.10559384,1
Female,0.08096125,0.009461269,0.07959215,0.08235179,1


In [35]:
fname = paste(data_root, 'pred_all_tweet.csv', sep = '')
write.csv(MEs, fname, row.names = FALSE)

### Subset of authors of non East Asian ethnicity

In [36]:
table(mydata$author_eth_ethnea_broad)


                African               CARIBBEAN                 Chinese 
                  15364                     129                  232407 
              EastAsian         EasternEuropean                 English 
                 118827                  124549                  679328 
                 Indian           MiddleEastern                     org 
                  96652                  150079                      22 
             POLYNESIAN        SouthernEuropean                 unknown 
                      1                  396913                    8300 
WesternNorthernEuropean 
                 552848 

In [37]:
# num of observations with East Asian names (Chinese + non-Chinese East Asian).
nrow(mydata[mydata$author_eth_ethnea_broad %in% c('Chinese', 'EastAsian', 'unknown'), ])

In [38]:
nrow(mydata[!mydata$author_eth_ethnea_broad %in% c('Chinese', 'EastAsian', 'unknown'), ])

In [39]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"

In [40]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_all_exc <- glmer(formula = equation, data = mydata[!mydata$author_eth_ethnea_broad %in% c('Chinese', 'EastAsian', 'unknown'), ], family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [41]:
tidy(m_all_exc)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.548255794,0.01418281,-179.672143,0.0
fixed,,genderFemale,-0.252683539,0.00665868,-37.94799,0.0
fixed,,authorship_posfirst_position,0.316888932,0.008978428,35.294479,7.137045e-273
fixed,,authorship_posmiddle_position,-0.662388911,0.007775703,-85.187014,0.0
fixed,,authorship_possolo_author,0.671769855,0.01984849,33.844881,4.316335e-251
fixed,,author_pub_count_cate,0.326350539,0.005665899,57.599073,0.0
fixed,,I(author_pub_count_cate^2),-0.027289899,0.0005279468,-51.690627,0.0
fixed,,affiliation_rank_cate,-0.048866781,0.001240734,-39.385377,0.0
fixed,,affiliation_cateinternational,0.081037421,0.007804019,10.384062,2.930353e-25
fixed,,num_authors,-0.001620936,9.280024e-05,-17.466936,2.558713e-68


In [42]:
# use this search string in Sublime: " \\\\ \n  &" with " &"
stargazer(m_all_exc, type = "latex", single.row=TRUE, ci = FALSE, report = ('vc*p'), star.cutoffs = c(0.05, 0.01, 0.001))



% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu
% Date and time: Tue, May 10, 2022 - 04:12:28 PM
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
 & \multicolumn{1}{c}{\textit{Dependent variable:}} \\ 
\cline{2-2} 
\\[-1.8ex] & self\_promotion \\ 
\hline \\[-1.8ex] 
 genderFemale & $-$0.253$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posfirst\_position & 0.317$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posmiddle\_position & $-$0.662$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_possolo\_author & 0.672$^{***}$ \\ 
  & p = 0.000 \\ 
  author\_pub\_count\_cate & 0.326$^{***}$ \\ 
  & p = 0.000 \\ 
  I(author\_pub\_count\_cate$\hat{\mkern6mu}$2) & $-$0.027$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_rank\_cate & $-$0.049$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_cateinternational & 0.081$^{***}$ \\ 
  & p = 0.000 \\ 
  num\_authors & $-$0.0

### Subset of authors on Twitter

In [43]:
nrow(mydata[mydata$author_self_promotion_rate > 0, ])

In [44]:
nrow(mydata[mydata$author_self_promotion_rate > 0, ]) / nrow(mydata)

In [45]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_on_tw <- glmer(formula = equation, data = mydata[mydata$author_self_promotion_rate > 0, ], 
                     family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [46]:
tidy(m_on_tw)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),1.770110506,0.02333376,75.8604937,0.0
fixed,,genderFemale,-0.06987166,0.009990694,-6.9936741,2.677792e-12
fixed,,authorship_posfirst_position,0.260435484,0.01455079,17.8983682,1.214279e-71
fixed,,authorship_posmiddle_position,-0.740984512,0.01070766,-69.2013562,0.0
fixed,,authorship_possolo_author,0.359248584,0.03243531,11.0758493,1.6431480000000002e-28
fixed,,author_pub_count_cate,-0.401610731,0.009562722,-41.9975322,0.0
fixed,,I(author_pub_count_cate^2),0.007624041,0.0008468138,9.0032083,2.19216e-19
fixed,,affiliation_rank_cate,0.014829163,0.001725573,8.593761,8.416803e-18
fixed,,affiliation_cateinternational,0.247634111,0.01024264,24.1767773,3.905197e-129
fixed,,num_authors,-0.001415606,7.723111e-05,-18.3294802,4.814843e-75


In [47]:
# use this search string in Sublime: " \\\\ \n  &" with " &"
stargazer(m_on_tw, type = "latex", single.row=TRUE, ci = FALSE, report = ('vc*p'), star.cutoffs = c(0.05, 0.01, 0.001))


% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu
% Date and time: Tue, May 10, 2022 - 04:13:23 PM
\begin{table}[!htbp] \centering 
  \caption{} 
  \label{} 
\begin{tabular}{@{\extracolsep{5pt}}lc} 
\\[-1.8ex]\hline 
\hline \\[-1.8ex] 
 & \multicolumn{1}{c}{\textit{Dependent variable:}} \\ 
\cline{2-2} 
\\[-1.8ex] & self\_promotion \\ 
\hline \\[-1.8ex] 
 genderFemale & $-$0.070$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posfirst\_position & 0.260$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_posmiddle\_position & $-$0.741$^{***}$ \\ 
  & p = 0.000 \\ 
  authorship\_possolo\_author & 0.359$^{***}$ \\ 
  & p = 0.000 \\ 
  author\_pub\_count\_cate & $-$0.402$^{***}$ \\ 
  & p = 0.000 \\ 
  I(author\_pub\_count\_cate$\hat{\mkern6mu}$2) & 0.008$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_rank\_cate & 0.015$^{***}$ \\ 
  & p = 0.000 \\ 
  affiliation\_cateinternational & 0.248$^{***}$ \\ 
  & p = 0.000 \\ 
  num\_authors & $-$0.001$

In [48]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_on_tw, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [49]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.4722573,0.01231103,0.4662479,0.4782748,1
Female,0.454884,0.01423529,0.4479747,0.4618106,1


In [50]:
write.csv(MEs, paste(data_root, "pred_on_twitter.csv", sep = ''), row.names=FALSE)

### Gender x Journal Impact (all tweets)

In [51]:
base_str <- "self_promotion ~ 1 + gender * journal_impact + authorship_pos + author_pub_count_cate + \
            I(author_pub_count_cate^2) + affiliation_rank_cate + affiliation_cate + num_authors + author_citation_log"

In [52]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_x_jif <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


In [53]:
tidy(m_x_jif)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.74646292,0.01392736,-197.199088,0.0
fixed,,genderFemale,-0.20128656,0.00801077,-25.126992,2.522242e-139
fixed,,journal_impact,0.03889219,0.0005820612,66.818055,0.0
fixed,,authorship_posfirst_position,0.31766707,0.008773794,36.206351,4.8365330000000003e-287
fixed,,authorship_posmiddle_position,-0.67966259,0.007626157,-89.12256,0.0
fixed,,authorship_possolo_author,0.75246464,0.01973059,38.136964,0.0
fixed,,author_pub_count_cate,0.33815889,0.005520662,61.253327,0.0
fixed,,I(author_pub_count_cate^2),-0.02870487,0.0005147563,-55.764004,0.0
fixed,,affiliation_rank_cate,-0.0538786,0.001219117,-44.194763,0.0
fixed,,affiliation_cateinternational,0.02208866,0.007703461,2.867368,0.00413901


In [54]:
MEs = ggemmeans(m_x_jif, terms=c('journal_impact [0:40 by=5]', 'gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="journal_impact [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [55]:
MEs

x,predicted,std.error,conf.low,conf.high,group
0,0.08961525,0.009311993,0.08813735,0.09111545,Male
0,0.07449356,0.010470531,0.07309099,0.07592084,Female
5,0.1067973,0.008624728,0.10519547,0.10842057,Male
5,0.08493503,0.009455952,0.08350563,0.08638658,Female
10,0.1268148,0.008888105,0.1248983,0.12875638,Male
10,0.09668714,0.009784282,0.09502516,0.09837502,Female
15,0.14995438,0.010027494,0.14746637,0.15247684,Male
15,0.1098701,0.011339465,0.1077153,0.11206259,Female
20,0.1764629,0.011791634,0.17312937,0.17984665,Male
20,0.12460257,0.013710176,0.12170096,0.12756331,Female


In [56]:
write.csv(MEs, paste(data_root, "gender_jif.csv", sep = ''), row.names=FALSE)

### Gender x Affiliation rank (all tweets)

In [57]:
base_str <- "self_promotion ~ 1 + gender * affiliation_rank_cate + authorship_pos + author_pub_count_cate + \
            I(author_pub_count_cate^2) + affiliation_cate + num_authors + journal_impact + author_citation_log"

In [58]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_x_affi <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)

In [59]:
tidy(m_x_affi)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.73210566,0.01410275,-193.728626,0.0
fixed,,genderFemale,-0.24716614,0.01077569,-22.937386,1.969087e-116
fixed,,affiliation_rank_cate,-0.051523253,0.00140656,-36.630673,9.295323999999999e-294
fixed,,authorship_posfirst_position,0.320028889,0.008771305,36.485891,1.856459e-291
fixed,,authorship_posmiddle_position,-0.679350049,0.007624405,-89.10204,0.0
fixed,,authorship_possolo_author,0.752661036,0.01972538,38.156993,0.0
fixed,,author_pub_count_cate,0.335828471,0.005521884,60.817736,0.0
fixed,,I(author_pub_count_cate^2),-0.028435103,0.0005148863,-55.225986,0.0
fixed,,affiliation_cateinternational,0.02279556,0.007701345,2.959945,0.003076938
fixed,,num_authors,-0.001670908,9.618875e-05,-17.371136,1.364974e-67


In [60]:
MEs = ggemmeans(m_x_affi, terms=c('affiliation_rank_cate', 'gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="affiliation_rank_cate [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [61]:
MEs

x,predicted,std.error,conf.low,conf.high,group
0,0.12480105,0.010332842,0.12260577,0.12702995,Male
0,0.10020972,0.011986122,0.09811128,0.10234796,Female
1,0.11928131,0.009638121,0.11731105,0.12128012,Male
1,0.09503382,0.01091343,0.0932101,0.0968894,Female
2,0.11397391,0.009109112,0.11218338,0.11578929,Male
2,0.09009848,0.010094469,0.08848961,0.09173366,Female
3,0.10887346,0.008775837,0.10721588,0.1105535,Male
3,0.08539526,0.009594436,0.08393796,0.08687546,Female
4,0.10397449,0.008660919,0.10240362,0.10556661,Male
4,0.08091572,0.009464018,0.07954693,0.08230596,Female


In [62]:
write.csv(MEs, paste(data_root, "gender_affi.csv", sep = ''), row.names=FALSE)

### Gender x Prior pubs (all tweets)

In [63]:
base_str <- "self_promotion ~ 1 + gender * author_pub_count_cate + gender * I(author_pub_count_cate^2) + \
            authorship_pos + affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"

In [91]:
equation <- as.formula(paste(base_str, " + (1|doi) + ", keywords, sep = " "))
m_x_pub <- glmer(formula = equation, data = mydata, family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)

In [65]:
tidy(m_x_pub)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.710463306,0.01484662,-182.564335,0.0
fixed,,genderFemale,-0.306872737,0.01426762,-21.508331,1.301021e-102
fixed,,author_pub_count_cate,0.328320184,0.006294283,52.161649,0.0
fixed,,I(author_pub_count_cate^2),-0.027560125,0.0006180569,-44.591569,0.0
fixed,,authorship_posfirst_position,0.319629233,0.008773804,36.429947,1.429331e-290
fixed,,authorship_posmiddle_position,-0.679499336,0.007625209,-89.11222,0.0
fixed,,authorship_possolo_author,0.752411367,0.01972608,38.142973,0.0
fixed,,affiliation_rank_cate,-0.053821502,0.001218853,-44.157509,0.0
fixed,,affiliation_cateinternational,0.022591957,0.007701257,2.933542,0.003351188
fixed,,num_authors,-0.001672023,9.619259e-05,-17.382034,1.128778e-67


In [66]:
MEs = ggemmeans(m_x_pub, terms=c('author_pub_count_cate', 'gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="author_pub_count_cate [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [67]:
MEs

x,predicted,std.error,conf.low,conf.high,group
0,0.04601856,0.01817396,0.04447985,0.04760786,Male
0,0.0342747,0.019042079,0.03306059,0.03553175,Female
1,0.06117794,0.013968753,0.05962423,0.06276944,Male
1,0.04666721,0.014394975,0.04542794,0.04793859,Female
2,0.07690229,0.011142746,0.0753662,0.07846703,Male
2,0.05973244,0.011885767,0.05843739,0.06105432,Female
3,0.09156453,0.009553775,0.09001884,0.09313405,Male
3,0.07198614,0.010807133,0.07058389,0.07341405,Female
4,0.10346717,0.008917008,0.10185717,0.10509964,Male
4,0.08182737,0.010376747,0.08031228,0.08336846,Female


In [68]:
write.csv(MEs, paste(data_root, "gender_pub.csv", sep = ''), row.names=FALSE)

### Disciplines

In [69]:
nrow(mydata[mydata$General == 1, ])

Life Sciences

In [70]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_life <- glmer(formula = equation, data = mydata[mydata$Life_Sciences == 1, ], 
                     family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


fixed-effect model matrix is rank deficient so dropping 1 column / coefficient


In [71]:
tidy(m_life)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-2.965929154,0.0319628371,-92.79305042,0.0
fixed,,genderFemale,-0.362607128,0.0119986063,-30.22077053,1.263796e-200
fixed,,authorship_posfirst_position,0.262813545,0.0167902539,15.65274388,3.181989e-55
fixed,,authorship_posmiddle_position,-0.762056902,0.0142562482,-53.45423937,0.0
fixed,,authorship_possolo_author,0.552607476,0.0506939001,10.90086725,1.141639e-27
fixed,,author_pub_count_cate,0.304634925,0.010685268,28.50980661,8.85453e-179
fixed,,I(author_pub_count_cate^2),-0.030721006,0.0009843564,-31.20923209,7.985255e-214
fixed,,affiliation_rank_cate,-0.077371888,0.0023134525,-33.44433829,3.1108629999999998e-245
fixed,,affiliation_cateinternational,0.056780396,0.0143836621,3.94756183,7.895113e-05
fixed,,num_authors,-0.002853619,0.0005443413,-5.24233464,1.585575e-07


In [72]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_life, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [73]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.07769801,0.02124144,0.07476655,0.08073438,1
Female,0.0553755,0.02249357,0.05311407,0.05772735,1


In [74]:
write.csv(MEs, paste(data_root, "pred_life.csv", sep = ''), row.names=FALSE)

Social Sciences

In [75]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_social <- glmer(formula = equation, data = mydata[mydata$Social_Sciences == 1, ], 
                     family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


fixed-effect model matrix is rank deficient so dropping 1 column / coefficient


In [76]:
tidy(m_social)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-1.808001,0.0407766,-44.339186128,0.0
fixed,,genderFemale,-0.1075062,0.01782025,-6.032808536,1.611343e-09
fixed,,authorship_posfirst_position,0.4146377,0.02162616,19.172967833,6.225384e-82
fixed,,authorship_posmiddle_position,-0.2604136,0.02294956,-11.347216318,7.656146e-30
fixed,,authorship_possolo_author,0.6233673,0.03288786,18.954326284,4.067291e-80
fixed,,author_pub_count_cate,0.4000013,0.01549415,25.816274418,5.822356e-147
fixed,,I(author_pub_count_cate^2),-0.02770151,0.001590493,-17.416932869,6.138049e-68
fixed,,affiliation_rank_cate,-0.03518698,0.003311631,-10.625271836,2.2734879999999998e-26
fixed,,affiliation_cateinternational,0.2479048,0.02086184,11.883170125,1.447708e-32
fixed,,num_authors,-0.06581681,0.004259731,-15.450930326,7.436235e-54


In [77]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_social, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [78]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.2222351,0.02164826,0.2149877,0.2296553,1
Female,0.2042086,0.02260249,0.1971038,0.2115021,1


In [79]:
write.csv(MEs, paste(data_root, "pred_social.csv", sep = ''), row.names=FALSE)

Physical Sciences

In [80]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_phy <- glmer(formula = equation, data = mydata[mydata$Physical_Sciences == 1, ], 
                     family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


fixed-effect model matrix is rank deficient so dropping 1 column / coefficient


In [81]:
tidy(m_phy)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-3.278788426,0.0356392347,-91.999406,0.0
fixed,,genderFemale,-0.242226328,0.0152004993,-15.9354192,3.597801e-57
fixed,,authorship_posfirst_position,0.299173835,0.0191049109,15.6595253,2.8602420000000003e-55
fixed,,authorship_posmiddle_position,-0.601386921,0.0164457483,-36.5679269,9.254277e-293
fixed,,authorship_possolo_author,0.661960109,0.0487362596,13.5824972,5.08623e-42
fixed,,author_pub_count_cate,0.307732192,0.012813122,24.0169564,1.84953e-127
fixed,,I(author_pub_count_cate^2),-0.029785641,0.0011809363,-25.2220555,2.2951400000000003e-140
fixed,,affiliation_rank_cate,-0.045861103,0.0026770242,-17.1313736,8.658604e-66
fixed,,affiliation_cateinternational,-0.034047013,0.0168765965,-2.0174099,0.04365275
fixed,,num_authors,-0.001348467,8.95511e-05,-15.0580753,3.0555399999999996e-51


In [82]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_phy, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [83]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.07186789,0.02591246,0.06855301,0.0753301,1
Female,0.05729334,0.0285668,0.05434317,0.06039343,1


In [84]:
write.csv(MEs, paste(data_root, "pred_phy.csv", sep = ''), row.names=FALSE)

Health Sciences

In [85]:
base_str <- "self_promotion ~ 1 + gender + authorship_pos + author_pub_count_cate + I(author_pub_count_cate^2) + \
            affiliation_rank_cate + affiliation_cate + num_authors + journal_impact + author_citation_log"
equation <- as.formula(paste(base_str, " + (1|doi)", keywords, sep = " "))
m_health <- glmer(formula = equation, data = mydata[mydata$Health_Sciences == 1, ], 
                     family = "binomial", control = glmerControl(optimizer = "nloptwrap"), nAGQ = 0)


fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients


In [86]:
tidy(m_health)

effect,group,term,estimate,std.error,statistic,p.value
fixed,,(Intercept),-3.300210562,0.0514420771,-64.1539134,0.0
fixed,,genderFemale,-0.239625755,0.0113357675,-21.1389088,3.4905510000000004e-99
fixed,,authorship_posfirst_position,0.411643306,0.0158972523,25.8939909,7.782861e-148
fixed,,authorship_posmiddle_position,-0.657921207,0.0137467628,-47.8600828,0.0
fixed,,authorship_possolo_author,0.684666384,0.0447980556,15.2833951,9.866373e-53
fixed,,author_pub_count_cate,0.383150418,0.0094654677,40.4787623,0.0
fixed,,I(author_pub_count_cate^2),-0.025751318,0.0008892475,-28.9585491,2.190276e-184
fixed,,affiliation_rank_cate,-0.045559152,0.0021875384,-20.8266755,2.4809580000000002e-96
fixed,,affiliation_cateinternational,0.116611872,0.0139577222,8.3546492,6.562746000000001e-17
fixed,,num_authors,-0.004713943,0.000576153,-8.1817542,2.797411e-16


In [87]:
# weighted average for factor variable, median for non-factor variables
MEs = ggemmeans(m_health, terms=c('gender'), typical='median')

Model contains polynomial or cubic / quadratic terms. Consider using `terms="gender [all]"` to get smooth plots. See also package-vignette 'Marginal Effects at Specific Values'.


In [88]:
MEs

x,predicted,std.error,conf.low,conf.high,group
Male,0.07175774,0.01597062,0.06970051,0.07387086,1
Female,0.05734456,0.01685765,0.05558443,0.05915694,1


In [89]:
write.csv(MEs, paste(data_root, "pred_health.csv", sep = ''), row.names=FALSE)