-
Notifications
You must be signed in to change notification settings - Fork 0
/
summarization_spotify_releases_chatGPT.py
168 lines (127 loc) · 5.32 KB
/
summarization_spotify_releases_chatGPT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import json
import time
from typing import Dict, List, Any
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())
def get_new_releases(limit: int = 50, offset: int = 0) -> List[Dict[str, Any]]:
"""
Fetch new releases from Spotify.
Args:
limit (int, optional): Maximum number of album results to return. Defaults to 50.
offset (int, optional): The index of the first result to return. Defaults to 0.
Returns:
List[Dict[str, Any]]: A list of dictionaries containing album information.
"""
new_releases = sp.new_releases(limit=limit, offset=offset)
albums = new_releases["albums"]["items"]
return albums
def get_album_tracks(album_id: str) -> List[Dict[str, Any]]:
"""
Fetch tracks from a specific album.
Args:
album_id (str): The Spotify ID of the album.
Returns:
List[Dict[str, Any]]: A list of dictionaries containing track information.
"""
tracks = sp.album_tracks(album_id)["items"]
return tracks
def save_data_to_file(data: List[Dict[str, Any]], file_path: str) -> None:
"""
Save data to a JSON file.
Args:
data (List[Dict[str, Any]]): List of dictionaries containing album and track information.
file_path (str): Path to the JSON file where the data will be saved.
"""
with open(file_path, "w", encoding="utf-8") as file:
json.dump(data, file, ensure_ascii=False, indent=4)
def load_data_from_file(file_path: str) -> List[Dict[str, Any]]:
"""
Load data from a JSON file.
Args:
file_path (str): Path to the JSON file where the data is stored.
Returns:
List[Dict[str, Any]]: List of dictionaries containing album and track information.
"""
with open(file_path, "r", encoding="utf-8") as file:
return json.load(file)
def download_latest_albums_data() -> None:
"""
Download the latest albums and tracks data from Spotify and save it to a JSON file.
"""
limit = 50
offset = 0
total_albums = 20
album_count = 0
all_albums = []
while total_albums is None or album_count < total_albums:
new_releases = get_new_releases(limit, offset)
if total_albums is None:
total_albums = sp.new_releases()["albums"]["total"]
for album in new_releases:
album_info = {
"album_name": album["name"],
"artist_name": album["artists"][0]["name"],
"album_type": album["album_type"],
"release_date": album["release_date"],
"tracks": [],
}
tracks = get_album_tracks(album["id"])
for track in tracks:
track_info = {
"track_name": track["name"],
"duration_ms": track["duration_ms"],
}
album_info["tracks"].append(track_info)
all_albums.append(album_info)
album_count += 1
offset += limit
time.sleep(1) # Add a delay to avoid hitting the rate limit
print(f"Downloaded {album_count}/{total_albums}")
save_data_to_file(all_albums, "albums_and_tracks.json")
def preprocess_docs(data: List[Dict[str, Any]]) -> List[Document]:
"""
Convert the JSON data to a list of Document objects.
Args:
data (List[Dict[str, Any]]): List of dictionaries containing album and track information.
Returns:
List[Document]: A list of Document objects containing the JSON data as strings, split into 3000-character segments.
"""
json_string = json.dumps(data, ensure_ascii=False, indent=4)
doc_splits = [json_string[i : i + 3500] for i in range(0, len(json_string), 3500)]
docs = [Document(page_content=split_text) for split_text in doc_splits]
return docs
def get_summary(docs: List[Document]) -> str:
"""
Generate a summary using the JSON data provided in the list of Document objects.
Args:
docs (List[Document]): A list of Document objects containing the JSON data as strings.
Returns:
str: The generated summary.
"""
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")
prompt_template = """Write a short summary about the latest songs in Spotify based on the JSON data below: \n\n{text}."""
prompt_template2 = """Write an article about the latest music released in Spotify (below) and adress the change in music trends using the style of Rick Beato. : \n\n{text}"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
PROMPT2 = PromptTemplate(template=prompt_template2, input_variables=["text"])
chain = load_summarize_chain(
llm,
chain_type="map_reduce",
return_intermediate_steps=True,
map_prompt=PROMPT,
combine_prompt=PROMPT2,
verbose=True,
)
res = chain({"input_documents": docs}, return_only_outputs=True)
return res
if __name__ == "__main__":
download_latest_albums_data()
data = load_data_from_file("albums_and_tracks.json")
docs = preprocess_docs(data)
summary = get_summary(docs)
print(summary)