# Sonnet or Shanty?

In [1]:
import requests

# Sonnet

Start with sonnets from Project Gutenberg

In [3]:
req = requests.get("https://www.gutenberg.org/files/1041/1041-h/1041-h.htm")
req.raise_for_status()

In [4]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(req.text)

In [6]:
poem_tags = soup.select("p.poem")
len(poem_tags)

154

In [10]:
sonnets = [tag.get_text() for tag in poem_tags]
len(sonnets)

154

# Sea shanties

In [11]:
shanty_req = requests.get("https://assassinscreed.fandom.com/wiki/Sea_shanties")
shanty_req.raise_for_status()

In [13]:
shanty_soup = BeautifulSoup(shanty_req.text)

In [15]:
shanty_tabs = shanty_soup.select("div.tabbertab")
len(shanty_tabs)

52

In [18]:
shanties = [tag.get_text() for tag in shanty_tabs]
len(shanties)

52

## Combine them!

In [20]:
import pandas as pd

In [21]:
df_sonnets = pd.DataFrame(
    data={
        "text_type": "sonnet",
        "text": sonnets
    }
)

df_sonnets.head()

Unnamed: 0,text_type,text
0,sonnet,\r\n From fairest creatures we desire increas...
1,sonnet,\r\n When forty winters shall besiege thy bro...
2,sonnet,\r\n Look in thy glass and tell the face thou...
3,sonnet,"\r\n Unthrifty loveliness, why dost thou spen..."
4,sonnet,"\r\n Those hours, that with gentle work did f..."


In [22]:
df_shanties = pd.DataFrame(
    data={
        "text_type": "shanty",
        "text": shanties
    }
)

df_shanties.head()

Unnamed: 0,text_type,text
0,shanty,\n\nOld Billy Riley was a dancing master.\nOld...
1,shanty,"\n\nMe bonnie bunch o’roses O!\nCome down, ye ..."
2,shanty,"\n\nWell, our anchor’s on board and our rags a..."
3,shanty,"\n\n(Chorus)\nHelp me, Bob, I'm bully in the a..."
4,shanty,"\n\nO, my name was Captain Kidd,\nas I sailed,..."


In [23]:
df = pd.concat([df_sonnets, df_shanties],
               axis=0,
               ignore_index=True)
print(df.shape)
df.head()

(206, 2)


Unnamed: 0,text_type,text
0,sonnet,\r\n From fairest creatures we desire increas...
1,sonnet,\r\n When forty winters shall besiege thy bro...
2,sonnet,\r\n Look in thy glass and tell the face thou...
3,sonnet,"\r\n Unthrifty loveliness, why dost thou spen..."
4,sonnet,"\r\n Those hours, that with gentle work did f..."


In [24]:
df.tail()

Unnamed: 0,text_type,text
201,shanty,"\n\nThe worst old ship that ever did sail,\nSa..."
202,shanty,"\n\nOh, where am I to go, M'Johnnies, oh where..."
203,shanty,"\n\nWhiskey is the life of man,\nWhiskey, John..."
204,shanty,\n\nAs we were a-fishing off Happisburgh light...
205,shanty,"\n\nWe'll heave him up an away we'll go\n'Way,..."


In [25]:
df.to_csv("sonnet_or_shanty.csv.gz", index=False)