In [1]:
import pandas as pd
import numpy as np

In [2]:
featuremap = {
    '(Intercept)': 'Intercept',
    'sentiment_prob_pos': 'Positive Sentiment',
    'sentiment_prob_neg': 'Negative Sentiment',
    'lexdiv_cttr': 'Lexical Diversity',
    'num_punct': '# Punctuation Marks',
    'num_sentences': '# Sentences',
    'SMOG_readability': 'Readability',
    'second_personTRUE': 'Text Uses 2nd Person Pronouns',
    'user_follower': 'Author Follower Count',
    'level_in_tree': 'Comment Level in Tree',
    'cosine_1': 'Topical Similarity to Article',
    'hours_since_article': 'Time Since Article Publication',
    'votes_pos_mean': 'Mean Upvotes in Discussion',
    'votes_neg_mean': 'Mean Downvotes in Discussion',
    'article_comments': '# Comments in Discussion',
    'factor(genre1)/diestandard': 'Genre: Women\'s Issues',
    'factor(genre1)/diskurs': 'Genre: Opinion',
    'factor(genre1)/etat': 'Genre: Media',
    'factor(genre1)/international': 'Genre: International',
    'factor(genre1)/kultur': 'Genre: Culture',
    'factor(genre1)/lifestyle': 'Genre: Lifestyle',
    'factor(genre1)/panorama': 'Genre: Panorama',
    'factor(genre1)/podcast': 'Genre: Podcast',
    'factor(genre1)/recht': 'Genre: Law',
    'factor(genre1)/sport': 'Genre: Sports',
    'factor(genre1)/video': 'Genre: Video',
    'factor(genre1)/web': 'Genre: Web',
    'factor(genre1)/wirtschaft': 'Genre: Economy',
    'factor(genre1)/wissenschaft': 'Genre: Science',
    'is_leaf_commentTRUE': 'Is Leaf Comment',
    'is_root_commentTRUE': 'Is Root Comment',
    'size_of_tree': 'Size of Comment Tree',
    'height_of_tree': 'Height of Comment Tree',
    'all_replies': '# Replies to Comment',
    'votes_neg_log': 'Comment Downvotes',
    'votes_pos_log': 'Comment Upvotes',
    'pinned_fTRUE': 'Comment Pinned'
}

In [3]:
commentfeats = ['Positive Sentiment',
                'Negative Sentiment',
                'Lexical Diversity',
                'Readability',
                'Topical Similarity to Article',
                '# Punctuation Marks',
                '# Sentences',
                'Text Uses 2nd Person Pronouns',
                'Author Follower Count',
                'Time Since Article Publication']

commentfeats_v = commentfeats + ['Is Root Comment', 'Comment Level in Tree']

articlefeats = ['Mean Upvotes in Discussion',
                'Mean Downvotes in Discussion',
                '# Comments in Discussion']

genrefeats = ['Genre: Women\'s Issues',
            'Genre: Opinion',
            'Genre: Media',
            'Genre: International',
            'Genre: Culture',
            'Genre: Lifestyle',
            'Genre: Panorama',
            'Genre: Podcast',
            'Genre: Law',
            'Genre: Sports',
            'Genre: Video',
            'Genre: Web',
            'Genre: Economy',
            'Genre: Science']

structurefeats = ['Comment Pinned',
                'Is Leaf Comment',
                'Size of Comment Tree',
                'Height of Comment Tree',
                '# Replies to Comment']

behaviourfeats = ['Comment Downvotes',
                'Comment Upvotes']

In [4]:
pincoefs = pd.read_csv('model_output/regressions/pin_coefs.csv', index_col=0)
pincoefs['name'] = pincoefs['name'].map(featuremap)
pincoefs.columns = ['Feature', 1, 0, 2]

pinpvalues = pd.read_csv('model_output/regressions/pin_pvalues.csv', index_col=0)
pinpvalues['name'] = pinpvalues['name'].map(featuremap)
pinpvalues.columns = ['Feature', 1, 0, 2]

pincoefs = pincoefs[['Feature', 1, 2]]
pinpvalues = pinpvalues[['Feature', 1, 2]]
pindf = pincoefs.merge(pinpvalues, on='Feature', suffixes=('_coef', '_pvalue'))
pindf = pindf.set_index('Feature').loc[['Intercept'] + commentfeats + articlefeats +
                               genrefeats + structurefeats[1:] +
                               behaviourfeats].reset_index()

In [5]:
# create latex table with coefficients and p-value indicated with asterisks

def format_pvalue(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    else:
        return ''
    
def format_coef(c):
    if pd.isna(c):
        return '-'
    else:
        return f'{c:.3f}'

def format_row(row, n):

    vals = []
    for model in range(1, n+1):
        vals.append(format_coef(row[f'{model}_coef']) + format_pvalue(row[f'{model}_pvalue']))

    return (row['Feature'] + ' & ' + ' & '.join(vals) + ' \\\\').replace('#', '\\#')

def create_latex_table_pin(df):
    print('\\begin{table}')
    print('\\begin{tabular}{l|cc}')
    print('Feature & Model 1 & Model 2 \\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 2), axis=1).values), '\\hline')
    print('AIC & 55031 & 10875 \\\\') # manual hack
    print('BIC & 55335 & 11243 \\\\') # manual hack
    print('F1 & 0.0007 & 0.8645 \\\\') # manual hack
    print('\\end{tabular}')
    print('\\caption{Logistic regression output for modelling journalist preferences through Editor\'s Picks. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:pin-regression}')
    print('\\end{table}')

def create_latex_table_upvote(df):
    print('\\begin{table*}')
    print('\\begin{tabular}{l|ccc}')
    print('Feature & Model 1 & Model 2 & Model 3\\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 3), axis=1).values), '\\hline')
    print('Dispersion & 0.744 & 0.805 & 0.858 \\\\') # manual hack
    print('AIC & 5850080 & 5795211 & 5743462 \\\\') # manual hack
    print('BIC & 5850452 & 5795595 & 5743906 \\\\') # manual hack
    print('RMSLE & 0.9590 & 0.9330 & 0.9100 \\\\') # manual hack
    print('\\end{tabular}')
    print('\\caption{Negative binomial regression output for modelling reader preferences through comment upvotes. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:upvote-regression}')
    print('\\end{table*}')

def create_latex_table_downvote(df):
    print('\\begin{table*}')
    print('\\begin{tabular}{l|ccc}')
    print('Feature & Model 1 & Model 2 & Model 3\\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 3), axis=1).values), '\\hline')
    print('Dispersion & 0.220 & 0.224 & 0.290 \\\\') # manual hack
    print('AIC & 2883458 & 2876820 & 2770519 \\\\') # manual hack
    print('BIC & 2883830 & 2877204 & 2770963 \\\\') # manual hack
    print('RMSLE & 0.7815 & 0.7729 & 0.7090 \\\\') # manual hack
    print('\\end{tabular}')
    print('\\caption{Negative binomial regression output for modelling reader preferences through comment downvotes. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:downvote-regression}')
    print('\\end{table*}')

def create_latex_table_1(df):
    print('\\begin{table}')
    print('\\resizebox{\\columnwidth}{!}{')
    print('\\begin{tabular}{l|ccc}')
    print('Feature & Editors\' Picks & Upvotes & Downvotes\\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 3), axis=1).values), '\\hline')
    print('Dispersion & - & 0.744 & 0.220 \\\\') # manual hack
    print('AIC & 55031 & 5850080 & 2883458 \\\\') # manual hack
    print('BIC & 55335 & 5850452 & 2883830 \\\\') # manual hack
    print('F1 & 0.0007 & - & - \\\\') # manual hack
    print('RMSLE & - & 0.9590 & 0.7815 \\\\') # manual hack
    print('\\end{tabular}}')
    print('\\caption{Logistic and negative binomial regression model summaries with coefficients for a comment being selected as an Editors\' Pick or receiving Up/Downvotes. Note that coefficients in the Editors\' Picks model for the structural factors are not fit here, since only root comments are eligible to be selected by journalists. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:pinvote-regressions}')
    print('\\end{table}')

def create_latex_table_2(df):
    print('\\begin{table}')
    print('\\resizebox{\\columnwidth}{!}{')
    print('\\begin{tabular}{l|cc}')
    print('Feature & Editors\' Picks & Upvotes \\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 2), axis=1).values), '\\hline')
    print('Dispersion & - & 0.220 \\\\') # manual hack
    print('AIC & 10875 & 2883458 \\\\') # manual hack
    print('BIC & 11243 & 2883830 \\\\') # manual hack
    print('F1 & 0.8645 & - \\\\') # manual hack
    print('RMSLE & - & 0.9100 \\\\') # manual hack
    print('\\end{tabular}}')
    print('\\caption{Logistic and negative binomial regression model summaries with coefficients for a comment being selected as an Editors\' Pick or receiving Upvotes. Note that coefficients in the Editors\' Picks model for the structural factors are not fit here, since only root comments are eligible to be selected by journalists. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:pinvote-regressions-2}')
    print('\\end{table}')

def create_latex_table_mega(df):
    print('\\begin{table}')
    print('\\resizebox{\\columnwidth}{!}{')
    print('\\begin{tabular}{l|cc}')
    print('Feature & \\begin{tabular}[c]{@{}c@{}}Relative Voting\\\\Preference\\end{tabular} & Comment Gap \\\\ \\hline')
    print('\n'.join(df.apply(lambda x: format_row(x, 2), axis=1).values), '\\hline')
    print('\\end{tabular}}')
    print('\\caption{Calculated coefficients for Relative Voting Preference and the Comment Gap. ***: $p < 0.001$, **: $p < 0.01$, *: $p < 0.05$.}')
    print('\\label{tab:logodds-regression}')
    print('\\end{table}')

In [6]:
create_latex_table_pin(pindf)

\begin{table}
\begin{tabular}{l|cc}
Feature & Model 1 & Model 2 \\ \hline
Intercept & -5.470*** & 26.624*** \\
Positive Sentiment & 0.137*** & 0.162*** \\
Negative Sentiment & -0.036* & -0.076* \\
Lexical Diversity & 0.548*** & 0.367*** \\
Readability & 0.134*** & 0.042 \\
Topical Similarity to Article & -0.021 & -0.029 \\
\# Punctuation Marks & -0.048* & 0.105* \\
\# Sentences & 0.083*** & -0.000 \\
Text Uses 2nd Person Pronouns & 0.217*** & 0.108 \\
Author Follower Count & 0.103*** & 0.053 \\
Time Since Article Publication & -1.191*** & -0.182*** \\
Mean Upvotes in Discussion & 0.201*** & -0.520*** \\
Mean Downvotes in Discussion & 0.069*** & -0.352*** \\
\# Comments in Discussion & -0.104*** & -0.715*** \\
Genre: Women's Issues & 0.832*** & -0.034 \\
Genre: Opinion & 0.148** & 0.006 \\
Genre: Media & 0.230*** & 0.160 \\
Genre: International & 0.195*** & 0.073 \\
Genre: Culture & 0.496*** & 0.400* \\
Genre: Lifestyle & 0.844*** & 0.488*** \\
Genre: Panorama & 0.348*** & 0.034 \\
Genr

In [7]:
upvotecoefs = pd.read_csv('model_output/regressions/votes_pos_coefs.csv', index_col=0)
upvotecoefs['name'] = upvotecoefs['name'].map(featuremap)
upvotecoefs.columns = ['Feature', 1, 2, 0, 3]

upvotepvalues = pd.read_csv('model_output/regressions/votes_pos_pvalues.csv', index_col=0)
upvotepvalues['name'] = upvotepvalues['name'].map(featuremap)
upvotepvalues.columns = ['Feature', 1, 2, 0, 3]

upvotecoefs = upvotecoefs.loc[list(range(1, 31)) + [35] + list(range(31, 35)) + [36],
                  ['Feature', 1, 2, 3]].reset_index(drop=True)
upvotepvalues = upvotepvalues.loc[list(range(1, 31)) + [35] + list(range(31, 35)) + [36],
                      ['Feature', 1, 2, 3]].reset_index(drop=True)
upvotedf = upvotecoefs.merge(upvotepvalues, on='Feature', suffixes=('_coef', '_pvalue'))
upvotedf = upvotedf.set_index('Feature').loc[['Intercept'] + commentfeats_v + articlefeats +
                               genrefeats + structurefeats +
                               behaviourfeats[:1]].reset_index()
# display(upvotecoefs)
# display(upvotepvalues)
# display(upvotedf)

# repeat, but for downvotes

downvotecoefs = pd.read_csv('model_output/regressions/votes_neg_coefs.csv', index_col=0)
downvotecoefs['name'] = downvotecoefs['name'].map(featuremap)
downvotecoefs.columns = ['Feature', 1, 2, 0, 3]

downvotepvalues = pd.read_csv('model_output/regressions/votes_neg_pvalues.csv', index_col=0)
downvotepvalues['name'] = downvotepvalues['name'].map(featuremap)
downvotepvalues.columns = ['Feature', 1, 2, 0, 3]

downvotecoefs = downvotecoefs.loc[list(range(1, 31)) + [35] + list(range(31, 35)) + [36],
                  ['Feature', 1, 2, 3]].reset_index(drop=True)
downvotepvalues = downvotepvalues.loc[list(range(1, 31)) + [35] + list(range(31, 35)) + [36],
                      ['Feature', 1, 2, 3]].reset_index(drop=True)
downvotedf = downvotecoefs.merge(downvotepvalues, on='Feature', suffixes=('_coef', '_pvalue'))
downvotedf = downvotedf.set_index('Feature').loc[['Intercept'] + commentfeats_v + articlefeats +
                               genrefeats + structurefeats +
                               behaviourfeats[1:]].reset_index()
# display(downvotecoefs)
# display(downvotepvalues)
# display(downvotedf)

In [8]:
model1df = pd.concat([pindf.set_index('Feature')[['1_coef', '1_pvalue']].dropna(),
            upvotedf.set_index('Feature')[['1_coef', '1_pvalue']].dropna(),
            downvotedf.set_index('Feature')[['1_coef', '1_pvalue']].dropna()],
            axis=1).loc[['Intercept'] + commentfeats_v+articlefeats+genrefeats].reset_index()
model1df.columns = ['Feature', '1_coef', '1_pvalue', '2_coef', '2_pvalue', '3_coef', '3_pvalue']
create_latex_table_1(model1df)

\begin{table}
\resizebox{\columnwidth}{!}{
\begin{tabular}{l|ccc}
Feature & Editors' Picks & Upvotes & Downvotes\\ \hline
Intercept & -5.470*** & 1.098*** & -0.405*** \\
Positive Sentiment & 0.137*** & -0.021*** & -0.065*** \\
Negative Sentiment & -0.036* & -0.010*** & 0.033*** \\
Lexical Diversity & 0.548*** & 0.160*** & 0.191*** \\
Readability & 0.134*** & 0.053*** & 0.076*** \\
Topical Similarity to Article & -0.021 & -0.004** & -0.046*** \\
\# Punctuation Marks & -0.048* & -0.048*** & -0.115*** \\
\# Sentences & 0.083*** & 0.028*** & 0.079*** \\
Text Uses 2nd Person Pronouns & 0.217*** & 0.070*** & 0.287*** \\
Author Follower Count & 0.103*** & 0.084*** & -0.081*** \\
Time Since Article Publication & -1.191*** & -0.245*** & -0.278*** \\
Is Root Comment & - & 0.116*** & 0.433*** \\
Comment Level in Tree & - & -0.691*** & -0.451*** \\
Mean Upvotes in Discussion & 0.201*** & 0.356*** & -0.019*** \\
Mean Downvotes in Discussion & 0.069*** & 0.076*** & 0.762*** \\
\# Comments in Discuss

In [9]:
model2df = pd.concat([pindf.set_index('Feature')[['2_coef', '2_pvalue']].dropna(),
            upvotedf.set_index('Feature')[['3_coef', '3_pvalue']].dropna()],
            axis=1).loc[['Intercept'] + commentfeats_v+articlefeats+genrefeats+structurefeats +
                               behaviourfeats].reset_index()
model2df.columns = ['Feature', '1_coef', '1_pvalue', '2_coef', '2_pvalue']
create_latex_table_2(model2df)

\begin{table}
\resizebox{\columnwidth}{!}{
\begin{tabular}{l|cc}
Feature & Editors' Picks & Upvotes \\ \hline
Intercept & 26.624*** & 1.273*** \\
Positive Sentiment & 0.162*** & -0.019*** \\
Negative Sentiment & -0.076* & -0.009*** \\
Lexical Diversity & 0.367*** & 0.103*** \\
Readability & 0.042 & 0.038*** \\
Topical Similarity to Article & -0.029 & 0.006*** \\
\# Punctuation Marks & 0.105* & -0.045*** \\
\# Sentences & -0.000 & 0.012*** \\
Text Uses 2nd Person Pronouns & 0.108 & 0.035*** \\
Author Follower Count & 0.053 & 0.078*** \\
Time Since Article Publication & -0.182*** & -0.183*** \\
Is Root Comment & - & 0.020*** \\
Comment Level in Tree & - & -0.827*** \\
Mean Upvotes in Discussion & -0.520*** & 0.382*** \\
Mean Downvotes in Discussion & -0.352*** & 0.036*** \\
\# Comments in Discussion & -0.715*** & -0.016*** \\
Genre: Women's Issues & -0.034 & 0.156*** \\
Genre: Opinion & 0.006 & 0.252*** \\
Genre: Media & 0.160 & -0.028*** \\
Genre: International & 0.073 & 0.038*** \\
Gen

In [10]:
create_latex_table_upvote(upvotedf)

\begin{table*}
\begin{tabular}{l|ccc}
Feature & Model 1 & Model 2 & Model 3\\ \hline
Intercept & 1.098*** & 1.104*** & 1.273*** \\
Positive Sentiment & -0.021*** & -0.028*** & -0.019*** \\
Negative Sentiment & -0.010*** & -0.010*** & -0.009*** \\
Lexical Diversity & 0.160*** & 0.134*** & 0.103*** \\
Readability & 0.053*** & 0.049*** & 0.038*** \\
Topical Similarity to Article & -0.004** & -0.001 & 0.006*** \\
\# Punctuation Marks & -0.048*** & -0.044*** & -0.045*** \\
\# Sentences & 0.028*** & 0.023*** & 0.012*** \\
Text Uses 2nd Person Pronouns & 0.070*** & 0.073*** & 0.035*** \\
Author Follower Count & 0.084*** & 0.078*** & 0.078*** \\
Time Since Article Publication & -0.245*** & -0.202*** & -0.183*** \\
Is Root Comment & 0.116*** & -0.011* & 0.020*** \\
Comment Level in Tree & -0.691*** & -0.698*** & -0.827*** \\
Mean Upvotes in Discussion & 0.356*** & 0.350*** & 0.382*** \\
Mean Downvotes in Discussion & 0.076*** & 0.083*** & 0.036*** \\
\# Comments in Discussion & 0.041*** & 0.017

In [11]:
create_latex_table_downvote(downvotedf)

\begin{table*}
\begin{tabular}{l|ccc}
Feature & Model 1 & Model 2 & Model 3\\ \hline
Intercept & -0.405*** & -0.398*** & 0.113*** \\
Positive Sentiment & -0.065*** & -0.070*** & -0.041*** \\
Negative Sentiment & 0.033*** & 0.033*** & 0.041*** \\
Lexical Diversity & 0.191*** & 0.185*** & 0.098*** \\
Readability & 0.076*** & 0.070*** & 0.039*** \\
Topical Similarity to Article & -0.046*** & -0.044*** & -0.027*** \\
\# Punctuation Marks & -0.115*** & -0.115*** & -0.116*** \\
\# Sentences & 0.079*** & 0.071*** & 0.039*** \\
Text Uses 2nd Person Pronouns & 0.287*** & 0.277*** & 0.231*** \\
Author Follower Count & -0.081*** & -0.083*** & -0.071*** \\
Time Since Article Publication & -0.278*** & -0.253*** & -0.184*** \\
Is Root Comment & 0.433*** & 0.357*** & 0.095*** \\
Comment Level in Tree & -0.451*** & -0.456*** & -0.662*** \\
Mean Upvotes in Discussion & -0.019*** & -0.025*** & -0.021*** \\
Mean Downvotes in Discussion & 0.762*** & 0.760*** & 0.698*** \\
\# Comments in Discussion & -0.00

In [12]:
vpn_diff1 = pd.read_csv('model_output/regressions/votes_pos_neg_diffs_1.csv', index_col=0)
vpn_diff1.rename(columns={'term': 'Feature'}, inplace=True)
vpn_diff1['Feature'] = vpn_diff1['Feature'].map(featuremap)
vpn_diff1 = vpn_diff1.set_index('Feature')[['coef_diff', 'p_value', 'ci_lower_95', 'ci_upper_95', 'ci_lower_99', 'ci_upper_99']]

pinvc_diff = pd.read_csv('model_output/regressions/pin_votes_compound_diffs.csv', index_col=0)
pinvc_diff.rename(columns={'term': 'Feature'}, inplace=True)
pinvc_diff['Feature'] = pinvc_diff['Feature'].map(featuremap)
pinvc_diff = pinvc_diff[['Feature', 'coef_diff', 'p_value', 'ci_lower_95',
                         'ci_upper_95', 'ci_lower_99', 'ci_upper_99']
                         ].iloc[1:].set_index('Feature')


In [13]:
df1 = vpn_diff1[['coef_diff', 'p_value']].rename(columns={'coef_diff': 'Log Odds',
                                                    'p_value': 'p-value'})
df2 = pinvc_diff[['coef_diff', 'p_value']].rename(columns={'coef_diff': 'Log Odds',
                                                    'p_value': 'p-value'})

df3 = df1.merge(df2, on='Feature', suffixes=('_1', '_2'), how='outer'
                ).loc[commentfeats_v+articlefeats+genrefeats].reset_index()
df3.rename(columns={'Log Odds_1': '1_coef',
                    'p-value_1': '1_pvalue',
                    'Log Odds_2': '2_coef',
                    'p-value_2': '2_pvalue'}, inplace=True)


In [14]:
create_latex_table_mega(df3)

\begin{table}
\resizebox{\columnwidth}{!}{
\begin{tabular}{l|cc}
Feature & \begin{tabular}[c]{@{}c@{}}Relative Voting\\Preference\end{tabular} & Comment Gap \\ \hline
Positive Sentiment & 0.044*** & 0.094*** \\
Negative Sentiment & -0.043*** & 0.007 \\
Lexical Diversity & -0.031*** & 0.579*** \\
Readability & -0.022*** & 0.156*** \\
Topical Similarity to Article & 0.042*** & -0.063*** \\
\# Punctuation Marks & 0.068*** & -0.116*** \\
\# Sentences & -0.051*** & 0.134*** \\
Text Uses 2nd Person Pronouns & -0.217*** & 0.434*** \\
Author Follower Count & 0.164*** & -0.062*** \\
Time Since Article Publication & 0.033*** & -1.224*** \\
Is Root Comment & -0.316*** & - \\
Comment Level in Tree & -0.240*** & - \\
Mean Upvotes in Discussion & 0.375*** & -0.174*** \\
Mean Downvotes in Discussion & -0.686*** & 0.755*** \\
\# Comments in Discussion & 0.043*** & -0.147*** \\
Genre: Women's Issues & -0.054* & 0.887*** \\
Genre: Opinion & 0.216*** & -0.068 \\
Genre: Media & 0.068*** & 0.162* \\
Genre:

In [15]:
model1coefs = pd.concat([pincoefs.set_index('Feature')[1], upvotecoefs.set_index('Feature')[1], downvotecoefs.set_index('Feature')[1]], axis=1).dropna()
model1coefs.columns = ['Editor\'s Picks', 'Upvotes', 'Downvotes']
model1coefslong = model1coefs.iloc[1:14].reset_index().melt(id_vars='Feature', value_vars=['Editor\'s Picks', 'Upvotes', 'Downvotes'])
model1pvalues = pd.concat([pinpvalues.set_index('Feature')[1], upvotepvalues.set_index('Feature')[1], downvotepvalues.set_index('Feature')[1]], axis=1).dropna()
model1pvalues.columns = ['Editor\'s Picks', 'Upvotes', 'Downvotes']
model1pvalueslong = model1pvalues.iloc[1:14].reset_index().melt(id_vars='Feature', value_vars=['Editor\'s Picks', 'Upvotes', 'Downvotes'])

In [16]:
genrecoefs = model1coefs.iloc[14:]
genrecoefs.index = genrecoefs.index.str[7:]
genrecoefs.loc['Domestic'] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  genrecoefs.loc['Domestic'] = 0


In [17]:
from scipy.stats import pearsonr, spearmanr

for n, model1 in enumerate(['Editor\'s Picks', 'Upvotes', 'Downvotes']):
    for m, model2 in enumerate(['Editor\'s Picks', 'Upvotes', 'Downvotes']):
        if n < m:
            p = pearsonr(genrecoefs[model1], genrecoefs[model2])
            s = spearmanr(genrecoefs[model1], genrecoefs[model2])
            print(model1, model2, p[0], p[1], s[0], s[1])

Editor's Picks Upvotes 0.45578779803187 0.08773598539655789 0.4428571428571427 0.0982940655468714
Editor's Picks Downvotes 0.3421365842303935 0.2119475543224776 0.48214285714285704 0.06874953772064366
Upvotes Downvotes 0.7575030528554413 0.0010717444648504378 0.8035714285714284 0.00030726503181250915


In [18]:
genreranks = pd.concat([genrecoefs.sort_values('Editor\'s Picks', ascending=False)['Editor\'s Picks'].reset_index(),
 genrecoefs.sort_values('Upvotes', ascending=False)['Upvotes'].reset_index(),
    genrecoefs.sort_values('Downvotes', ascending=False)['Downvotes'].reset_index()], axis=1).round(3)

In [19]:
pre = r"""\begin{table*}[]
\centering
\begin{tabular}{l|ll|ll|ll}
     & \multicolumn{2}{c|}{Editor's Picks} & \multicolumn{2}{c|}{Upvotes}    & \multicolumn{2}{c}{Downvotes}   \\
Rank & Genre               & Coefficient   & Genre             & Coefficient & Genre             & Coefficient \\ \hline"""
vals = ' \\\\ \n'.join(genreranks.astype(str).apply(lambda x: str(x.name + 1) + ' & ' + ' & '.join(x), axis=1).values)
post = r"""\end{tabular}
\caption{Regression coefficients for article genre in rank order in the first models for Editor\'s Picks, Upvotes, and Downvotes.}
\label{tab:genre-ranks}
\end{table*}"""
total = pre + '\n' + vals + '\n' + post
total = total.replace('Domestic', r'\textit{Domestic (Baseline)}').replace(' 0.0 ', r' \textit{0} ')

print(total)

\begin{table*}[]
\centering
\begin{tabular}{l|ll|ll|ll}
     & \multicolumn{2}{c|}{Editor's Picks} & \multicolumn{2}{c|}{Upvotes}    & \multicolumn{2}{c}{Downvotes}   \\
Rank & Genre               & Coefficient   & Genre             & Coefficient & Genre             & Coefficient \\ \hline
1 & Science & 1.316 & Opinion & 0.292 & Women's Issues & 0.276 \\ 
2 & Lifestyle & 0.844 & Lifestyle & 0.27 & Law & 0.205 \\ 
3 & Women's Issues & 0.832 & Law & 0.242 & Lifestyle & 0.154 \\ 
4 & Video & 0.671 & Women's Issues & 0.222 & Video & 0.124 \\ 
5 & Web & 0.625 & Video & 0.22 & Sports & 0.114 \\ 
6 & Sports & 0.546 & Science & 0.192 & Panorama & 0.11 \\ 
7 & Culture & 0.496 & Panorama & 0.183 & Web & 0.085 \\ 
8 & Podcast & 0.447 & Sports & 0.175 & Opinion & 0.076 \\ 
9 & Panorama & 0.348 & Web & 0.145 & Science & 0.053 \\ 
10 & Law & 0.241 & Culture & 0.118 & Economy & 0.038 \\ 
11 & Media & 0.23 & International & 0.08 & International & 0.023 \\ 
12 & International & 0.195 & Podcast & 0.054 