# Experiments in Audio Sentiment

In [None]:
import pandas as pd
import torch

from os import listdir, path

from transformers import pipeline

## Parse `xls` files

In [None]:
XLS_INPUT_DIR = "./data/xls"

In [None]:
xls_files = sorted([f for f in listdir(XLS_INPUT_DIR) if f.endswith("xls")])
xls_files

In [None]:
keep_cols = ["Filename", "Description", "Category", "SubCategory", "Library"]

descriptions_df = pd.DataFrame()
transition_idxs = []

for xf in xls_files:
  _df = pd.read_excel(path.join(XLS_INPUT_DIR, xf))
  drop_cols = [c for c in _df.columns if c not in keep_cols]

  # index of first 2 files in xls
  transition_idxs.extend([len(descriptions_df) - 0, len(descriptions_df) + 1])

  descriptions_df = pd.concat([descriptions_df, _df.drop(columns=drop_cols)], ignore_index=True)

  # index of last 2 files in xls
  transition_idxs.extend([len(descriptions_df) - 2, len(descriptions_df) - 1])

# Check length
display(f"Total files: {len(descriptions_df)}")

# Check if Categories make sense
display(descriptions_df.iloc[transition_idxs].drop(columns=["Filename", "Description"]))

## Save `csv` file

In [None]:
descriptions_df.to_csv("descriptions.csv", index=False)

## Read `csv` file

In [None]:
CSV_FILE_PATH = "./descriptions.csv"

In [None]:
descriptions_df = pd.read_csv(CSV_FILE_PATH)
descriptions_df.head()

## Test model on very first file name and description

In [None]:
phrases = [descriptions_df.iloc[0]["Description"], descriptions_df.iloc[0]["Filename"]]
phrases

## Some Model examples

In [None]:
# MODEL_NAME = "cardiffnlp/twitter-roberta-large-emotion-latest"
# MODEL_NAME = "SamLowe/roberta-base-go_emotions"
MODEL_NAME = "bhadresh-savani/distilbert-base-uncased-emotion"
MODEL_NAME = "cardiffnlp/twitter-roberta-base-emotion-multilabel-latest"
MODEL_NAME = "joeddav/distilbert-base-uncased-go-emotions-student"

### Setup

Create pipeline and function to process results

In [None]:
def sort_by_score(scores):
  return sorted(scores, key=lambda A: A["score"], reverse=True)

pipe = pipeline(
  "sentiment-analysis",
  model=MODEL_NAME,
  torch_dtype=torch.bfloat16,
  return_all_scores=True
)

### Run on Filename and Description

In [None]:
# Filename and Description
for p in phrases:
  print(p)
  scores = pipe(p)[0]
  display(sort_by_score(scores)[:3])

# Combined Filename and Description
p = ", ".join(phrases)
print(p)
scores = pipe(p)[0]
display(sort_by_score(scores)[:3])