In [None]:
import pandas as pd
from transformers import pipeline

sent_model = pipeline("sentiment-analysis")

  from .autonotebook import tqdm as notebook_tqdm


## Load data

In [None]:
df = pd.read_csv("maxdiff_dummy_data.csv")
df.head()

## Compute weighted score

In [None]:
df['weighted_score'] = df['maxdiff_mean'] * df['sample_size']
df['weighted_score'].head()

## Sentiment Analysis

In [None]:
df['sentiment'] = df['text'].apply(lambda x: sent_model(x)[0]['label'])
df['sentiment_score'] = df['text'].apply(lambda x: sent_model(x)[0]['score'])
df['sentiment'].head()

In [None]:
df['sentiment_score'].head()

## Add simple NLP features

In [None]:
df["length"] = df["text"].str.len()
df["word_count"] = df["text"].str.split().apply(len)
df["length"].head()

In [None]:
df["word_count"].head()

## Explore which attributes relate to higher persuasion

In [None]:
df.groupby("sentiment")["maxdiff_mean"].mean()
df[['maxdiff_mean', 'length', 'word_count']].corr()

## Top and bottom performers

In [None]:
top_texts = df.sort_values("maxdiff_mean", ascending=False).head(5)
bottom_texts = df.sort_values("maxdiff_mean", ascending=True).head(5)

top_texts[['text', 'maxdiff_mean', 'sentiment']]

In [None]:
bottom_texts[['text', 'maxdiff_mean', 'sentiment']]