In [2]:
import pandas as pd
import regex as re

# Gutenberg dataset

### https://huggingface.co/datasets/google-research-datasets/poem_sentiment

In [3]:
test = pd.read_parquet('data/gutenberg/test-00000-of-00001.parquet', engine='pyarrow').set_index('id')
train = pd.read_parquet('data/gutenberg/train-00000-of-00001.parquet', engine='pyarrow').set_index('id')
valid = pd.read_parquet('data/gutenberg/validation-00000-of-00001.parquet', engine='pyarrow').set_index('id')

all_df = pd.concat([test, train, valid])

In [4]:
all_df.rename(columns = {"verse_text": "TEXT"}, inplace = True)

In [7]:
# label: The sentiment label. Here
# 0 = negative
# 1 = positive
# 2 = no impact

all_df

Unnamed: 0_level_0,TEXT,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"my canoe to make more steady,",2
1,and be glad in the summer morning when the kin...,1
2,and when they reached the strait symplegades,2
3,she sought for flowers,2
4,"if they are hungry, paradise",2
...,...,...
100,"said my companion, 'i will show you soon",2
101,but god said,2
102,but if thou do thy best,2
103,"so generous to me. farewell, friend, since friend",1


# English PCD


In [19]:
pcd_df = pd.read_csv('data/english_PCD/merged_data.csv', index_col = 0)

In [23]:
pcd_df.rename(columns = {"Verse": "TEXT"}, inplace = True)

In [24]:
pcd_df

Unnamed: 0,TEXT,Meter,char_count
0,ah why this boding start this sudden pain,iambic,6
1,that wings my pulse and shoots from vein to vein,iambic,6
2,what mean regardless of yon midnight bell,iambic,6
3,these earthborn visions saddening o'er my cell,iambic,6
4,what strange disorder prompts these thoughts t...,iambic,6
...,...,...,...
202272,were mellow music matchd with him,iambic,6
202274,o life as futile then as frail,iambic,6
202275,o for thy voice to soothe and bless,iambic,6
202276,what hope of answer or redress,iambic,6


# Poki dataset

### https://github.com/whipson/PoKi-Poems-by-Kids/tree/master

In [113]:
poki_df = pd.read_csv('data/poki/poki.csv')

In [114]:
poki_df.rename(columns = {'text': 'TEXT'}, inplace = True)

In [117]:
poki_df

Unnamed: 0,id,title,author,grade,TEXT,char
0,104987,I Love The Zoo,,1,"roses are red, violets are blue. i love the...",62
1,67185,The scary forest.,,1,the forest is really haunted. i believe it to...,87
2,103555,A Hike At School,1st grade-wh,1,i took a hike at school today and this is wha...,324
3,112483,Computer,a,1,you can do what you want you can play a...,106
4,74516,Angel,aab,1,angel oh angle you spin like a top angel oh an...,164
...,...,...,...,...,...,...
61503,116016,Arrival,zane,12,i hate when i arrive to things so late whethe...,566
61504,31414,To all the Poets,zechariah,12,your works of poetry enspire within my heart a...,440
61505,31412,The summer meadows,zechariah,12,in the summer walking through the polder i bre...,341
61506,73996,Computer,zoe,12,my computer's very neat it's got cool colours ...,66


In [165]:
nltk.sent_tokenize(poki_df['TEXT'].iloc[100])

['almost every afternoon,  i eat brownies with a moon.',
 'every evening right at six,  i eat brownies stacked on bricks.']

# PERC Poem Emotion Recognition Corpus

### https://data.mendeley.com/datasets/n9vbc8g9cx/1

In [123]:
perc_df = pd.read_csv('data/perc/PERC_mendelly.csv')

In [124]:
perc_df.rename(columns = {'Poem': 'TEXT'}, inplace = True)

In [125]:
perc_df

Unnamed: 0,TEXT,Emotion
0,A Tree\nA tree beside the sandy river-beach \n...,sad
1,"Sri Krishna\n\nO immense Light and thou, O spi...",love
2,"Who\n\n\nIn the blue of the sky, in the green ...",peace
3,Revelation\n\n\nSomeone leaping from the rocks...,sad
4,The Silver Call\n\n\nThere is a godhead of unr...,joy
...,...,...
711,Daughter Taken By Mothers Lies\n\nHave you any...,sad
712,Involuntary Acceptance\n\nEven though\nWe’re f...,sad
713,Victim Of Poverty\n\nPoverty stricken youth ju...,sad
714,Rain\n\nI sit and watch\nas the rain falls \nf...,sad


# Poetry Foundation

### https://www.kaggle.com/datasets/tgdivy/poetry-foundation-poems

In [9]:
poetry_foundation_df = pd.read_csv('data/poetry_foundation/PoetryFoundationData.csv', index_col = 0)

In [10]:
poetry_foundation_df.rename(columns = {'Poem': 'TEXT'}, inplace = True)

In [11]:
poetry_foundation_df

Unnamed: 0,Title,TEXT,Poet,Tags
0,\r\n\r\n Objects Used to Pr...,"\r\n\r\nDog bone, stapler,\r\n\r\ncribbage boa...",Michelle Menting,
1,\r\n\r\n The New Church\r\n...,\r\n\r\nThe old cupola glinted above the cloud...,Lucia Cherciu,
2,\r\n\r\n Look for Me\r\n\r\...,\r\n\r\nLook for me under the hood\r\n\r\nof t...,Ted Kooser,
3,\r\n\r\n Wild Life\r\n\r\n ...,"\r\n\r\nBehind the silo, the Mother Rabbit\r\n...",Grace Cavalieri,
4,\r\n\r\n Umbrella\r\n\r\n ...,\r\n\r\nWhen I push your button\r\n\r\nyou fly...,Connie Wanek,
...,...,...,...,...
13,\r\n\r\n 1-800-FEAR\r\n\r\n...,\r\n\r\nWe'd like to talk with you about...,Jody Gladding,"Living,Social Commentaries,Popular Culture"
14,\r\n\r\n The Death of Atahu...,\r\n\r\n\r\n\r\n,William Jay Smith,
15,\r\n\r\n Poet's Wish\r\n\r\...,\r\n\r\n\r\n\r\n,William Jay Smith,
0,\r\n\r\n 0\r\n\r\n ...,\r\n\r\n Philosophic\r\n\r\nin its co...,Hailey Leithauser,"Arts & Sciences,Philosophy"
