# Create Taylor Swift Embeddings Data

Use this colab to analyze trends in Taylor Swift's song lyrics using [Phoenix OSS](https://github.com/Arize-ai/phoenix). Download the Kaggle dataset [here](https://www.kaggle.com/datasets/PromptCloudHQ/taylor-swift-song-lyrics-from-all-the-albums?select=taylor_swift_lyrics.csv).

In [1]:
!pip install arize-phoenix



In [2]:
import pandas as pd
import phoenix as px

In [3]:
df = pd.read_csv("highlight_summaries.csv", encoding="ISO-8859-1", delimiter="\t")
df

Unnamed: 0,videoID,title,youtube,highlight,highlight_summary,start,end
0,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The intense conflict between Gaza and Israel i...,The video provides a glimpse into the intense ...,0,30
1,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The ongoing conflict between Israel and Gaza i...,The video depicts the ongoing conflict between...,30,45
2,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,"Various images and text are shown, including m...",A video featuring various images and text on d...,45,59
3,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,"Facts about Israel and Palestine Conflict, Part 1","The conflict began in the early 1900s, when Je...",0,15
4,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,Ongoing violence and turmoil in Gaza City,The video provides a glimpse into the ongoing ...,15,30
...,...,...,...,...,...,...,...
327,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,The CCTV footage captures an intense altercati...,But things take an even darker turn when armed...,30,60
328,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,A chaotic roadside altercation unfolds as poli...,"In a tense climax, two vehicles are caught in ...",75,105
329,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A person is being chased while holding an item...,A person is being chased while holding an item...,0,15
330,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A thrilling and chaotic car chase is taking pl...,A thrilling and chaotic car chase is taking pl...,15,30


In [4]:
!pip install arize["AutoEmbeddings"]

zsh:1: no matches found: arize[AutoEmbeddings]


In [5]:
from arize.pandas.embeddings import EmbeddingGenerator, UseCases

df = df.reset_index(drop=True)
df

Unnamed: 0,videoID,title,youtube,highlight,highlight_summary,start,end
0,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The intense conflict between Gaza and Israel i...,The video provides a glimpse into the intense ...,0,30
1,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The ongoing conflict between Israel and Gaza i...,The video depicts the ongoing conflict between...,30,45
2,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,"Various images and text are shown, including m...",A video featuring various images and text on d...,45,59
3,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,"Facts about Israel and Palestine Conflict, Part 1","The conflict began in the early 1900s, when Je...",0,15
4,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,Ongoing violence and turmoil in Gaza City,The video provides a glimpse into the ongoing ...,15,30
...,...,...,...,...,...,...,...
327,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,The CCTV footage captures an intense altercati...,But things take an even darker turn when armed...,30,60
328,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,A chaotic roadside altercation unfolds as poli...,"In a tense climax, two vehicles are caught in ...",75,105
329,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A person is being chased while holding an item...,A person is being chased while holding an item...,0,15
330,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A thrilling and chaotic car chase is taking pl...,A thrilling and chaotic car chase is taking pl...,15,30


In [11]:
from dotenv import load_dotenv
import openai
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [12]:
from openai.embeddings_utils import get_embeddings
matrix = get_embeddings(df["highlight"].to_list(), engine="text-embedding-ada-002")

In [13]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
vis_dims = pca.fit_transform(matrix)
df["embed_vis"] = vis_dims.tolist()

In [23]:
# not run
generator = EmbeddingGenerator.from_use_case(
    use_case=UseCases.NLP.SUMMARIZATION,
    model_name="distilbert-base-uncased",
    # model_name="distilbert-base-uncased",
    tokenizer_max_length=512,
    batch_size=100,
)

df["h_vector"] = generator.generate_embeddings(text_col=df["highlight"])
# df["hs_vector"] = generator.generate_embeddings(text_col=df["highlight_summary"])

[38;21m  arize.utils.logging | INFO | Downloading pre-trained model 'distilbert-base-uncased'[0m
[38;21m  arize.utils.logging | INFO | Downloading tokenizer for 'distilbert-base-uncased'[0m
[38;21m  arize.utils.logging | INFO | Generating embedding vectors[0m


Map:   0%|          | 0/332 [00:00<?, ? examples/s]

In [14]:
df

Unnamed: 0,videoID,title,youtube,highlight,highlight_summary,start,end,embed_vis
0,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The intense conflict between Gaza and Israel i...,The video provides a glimpse into the intense ...,0,30,"[0.2066162929375035, -0.06064619974547461, 0.0..."
1,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,The ongoing conflict between Israel and Gaza i...,The video depicts the ongoing conflict between...,30,45,"[0.1622347432308445, -0.14033492643750115, -0...."
2,652b8baa43e8c47e4eb4838b,How is Russia benefitting from the Hamas Israe...,obQ35TTzo9M,"Various images and text are shown, including m...",A video featuring various images and text on d...,45,59,"[0.10334962456228959, 0.06363324422569137, -0...."
3,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,"Facts about Israel and Palestine Conflict, Part 1","The conflict began in the early 1900s, when Je...",0,15,"[0.18779103868569746, 0.07254543112761713, -0...."
4,652b8ba543e8c47e4eb4838a,"Israel Palestine Conflict, Part 1 | War #short...",g0FmYHsQWZ8,Ongoing violence and turmoil in Gaza City,The video provides a glimpse into the ongoing ...,15,30,"[0.1656252191767053, -0.1230691051667599, 0.00..."
...,...,...,...,...,...,...,...,...
327,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,The CCTV footage captures an intense altercati...,But things take an even darker turn when armed...,30,60,"[-0.14632934499876585, -0.016836753582328022, ..."
328,652b407543e8c47e4eb481dc,Israel-Hamas war: CCTV catches two women caugh...,cWxC3I_GdP8,A chaotic roadside altercation unfolds as poli...,"In a tense climax, two vehicles are caught in ...",75,105,"[-0.1612800287913977, -0.013845835045432853, 0..."
329,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A person is being chased while holding an item...,A person is being chased while holding an item...,0,15,"[-0.11573976608621504, 0.0871093711064868, 0.1..."
330,652b407643e8c47e4eb481dd,Moment Israeli border police eliminate Hamas t...,vHaHzr5M7gk,A thrilling and chaotic car chase is taking pl...,A thrilling and chaotic car chase is taking pl...,15,30,"[-0.19113611980770712, -0.05394640165128034, 0..."


In [15]:
schema = px.Schema(
    embedding_feature_column_names={
        "videos_embedding": px.EmbeddingColumnNames(
            vector_column_name="embed_vis", raw_data_column_name="highlight"
        )
    },
    feature_column_names=["title", "youtube", "start", "end"],
)

In [16]:
px.launch_app(px.Dataset(df, schema))

converting items in column `embed_vis` to numpy.ndarray, because they have the following type: list
ERROR:    [Errno 48] error while attempting to bind on address ('127.0.0.1', 6060): address already in use
Traceback (most recent call last):
  File "/Users/sasha/miniconda3/envs/smol/lib/python3.11/site-packages/uvicorn/server.py", line 160, in startup
    server = await loop.create_server(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/sasha/miniconda3/envs/smol/lib/python3.11/asyncio/base_events.py", line 1525, in create_server
    raise OSError(err.errno, 'error while attempting '
OSError: [Errno 48] error while attempting to bind on address ('127.0.0.1', 6060): address already in use

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/sasha/miniconda3/envs/smol/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "/Users/sasha/miniconda3/envs/smol/lib/python3.11/threading.py", li

In [12]:
px.active_session().view()

📺 Opening a view to the Phoenix app. The app is running at http://127.0.0.1:6060/


In [21]:
df = pd.read_parquet('/Users/sasha/Downloads/2023-10-14_23-01-09.parquet')


In [22]:
df

Unnamed: 0,videoID,title,summary,summary_vector,prediction_id,timestamp
0,652b44ad43e8c47e4eb48235,Israel-Hamas war: Israeli soldiers gear up to ...,An intense missile conflict unfolds in the ski...,"[-0.08562961220741272, -0.0525701567530632, -0...",75bb4f32-bc6e-4843-84f7-36d0fa039b20,2023-10-15 06:00:46.066000+00:00
1,652b443543e8c47e4eb4822f,Israel Air Force Rains Fire On Hamas Naval HQ ...,This video shows a city being struck by a disa...,"[0.10667204856872559, -0.297080397605896, -0.2...",802f380b-27c3-45c6-bbc9-c8b8304660e5,2023-10-15 06:00:46.066000+00:00
2,652b434143e8c47e4eb4820d,Tracking updates on Day 5 of Israel-Hamas War ...,The video depicts the current situation in Isr...,"[0.09777112305164337, -0.2457359880208969, -0....",490af43d-c600-490b-b217-cb2a2b153b72,2023-10-15 06:00:46.066000+00:00
3,652b433643e8c47e4eb4820b,Israel-Hamas War: Gaza Invasion Soon? | Vantag...,The video shows the ongoing escalation of the ...,"[-0.08089763671159744, -0.28464192152023315, -...",eaccc8cd-5dea-4ea3-87d3-347107b29ec3,2023-10-15 06:00:46.066000+00:00
4,652b42ac43e8c47e4eb481f3,Israel-Hamas Conflict | More than 1500 lives l...,Intense airstrikes continue to devastate the G...,"[-0.0032671913504600525, -0.15896207094192505,...",d45f3865-e51a-49f7-9290-b0e44f489ed9,2023-10-15 06:00:46.066000+00:00
5,652b40fe43e8c47e4eb481ed,Israel-Hamas Conflict | More than 1500 lives l...,The video shows the devastating impact of Isra...,"[0.005196314305067062, -0.2106756716966629, -0...",bbf8d244-1da6-48f2-9c2b-e666b21a7421,2023-10-15 06:00:46.066000+00:00
6,652b409843e8c47e4eb481e2,Israel goes on war footing after major Hamas a...,"In this video, the intensity of the airstrikes...","[0.0033128485083580017, -0.22791719436645508, ...",30a88b10-7266-41ee-88cf-ea330421aba0,2023-10-15 06:00:46.066000+00:00


In [None]:
# call gpt to get a summary of summary

