# Buffy the Vampire Slayer Spotify Podcasts

### connect to the Spotify API via spotipy and save the data as .json files

In [98]:
import requests

from dotenv import load_dotenv
import os
load_dotenv()

client_id = os.getenv("client_id")
client_secret = os.getenv("client_secret")

In [99]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

# setting spotify API credentials
client_id = {client_id}
client_secret = {client_secret}

# authenticate and authorize application
auth_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

# define base url
episodes_url = 'https://api.spotify.com/v1/search?q=buffy+the+vampire+slayer&type=episode&market=US&limit=50&offset='
shows_url = 'https://api.spotify.com/v1/search?q=buffy+the+vampire+slayer&type=show&market=US&limit=50&offset='

# list to store all data
episodes_data = []
shows_data = []

# iterate through all the offsets (950)
# episodes
for offset in range(0, 951):
    url = episodes_url + str(offset)
    response = sp._get(url)
    episodes_data.append(response)
    
# shows
for offset in range(0, 951):
    url = shows_url + str(offset)
    response = sp._get(url)
    shows_data.append(response)

# specify file path for json file
episodes_path = 'spotify_episodes.json'
shows_path = 'spotify_shows.json'

# open the file and write the data to the file
with open(episodes_path, 'w') as json_file:
    json.dump(episodes_data, json_file)

with open(shows_path, 'w') as json_file:
    json.dump(shows_data, json_file)

# confirmation message
print(f"Data saved to {episodes_path} successfully.")
print(f"Data saved to {shows_path} successfully.")

ReadTimeout: HTTPSConnectionPool(host='api.spotify.com', port=443): Read timed out. (read timeout=5)

### load the files (spotify_episodes and spotify_shows)

In [97]:
import datetime
import pandas as pd
import csv

with open('spotify_episodes.json', 'r') as file:
    spotify_episodes = json.load(file)
    
with open('spotify_shows.json', 'r') as file:
    spotify_shows = json.load(file)    

### explore the data

In [89]:
print(len(spotify_episodes))
print(len(spotify_shows))
print("---")
print(len(spotify_episodes[0]['episodes']['items']))
print("---")
print(len(spotify_episodes[950]))
print(spotify_episodes[0]['episodes']['items'][0])
print("---")

951
951
---
50
---
1
{'audio_preview_url': 'https://podz-content.spotifycdn.com/audio/clips/1Uw7sB7s3q1T6pcAdxAm65/clip_139362_196461.mp3', 'description': 'This week on the very first episode of Buffering the Vampire Slayer, Jenny and Kristin watch and discuss Buffy the Vampire Slayer Season 1 Episode 1: Welcome to the Hellmouth. If you like the show, feel free to leave us a review on iTunes - that would be so rad of you! Give us a shout on twitter: Jenny Owen Youngs (@jennyowenyoungs). Kristin Russo (@kristinnoeline). Buffering the Vampire Slayer (@bufferingcast). You can also join us on facebook.com/bufferingcast. Logo: Kristine Thune, kristinethune.com Special thanks to Mike Tuccillo, mix consultant on our theme music.', 'duration_ms': 2773916, 'explicit': True, 'external_urls': {'spotify': 'https://open.spotify.com/episode/4V5hScweWn1JkMXu6Xro8z'}, 'href': 'https://api.spotify.com/v1/episodes/4V5hScweWn1JkMXu6Xro8z', 'html_description': '<p>This week on the very first episode of Bu

### check the episodes

In [43]:
episodes = set()

for episode in spotify_episodes:
    for item in episode['episodes']['items']:
        name = item['name']
        if not ('Harry Potter' in name or
                'Spider-Man' in name or
                'Ant-Man' in name or
                'Mandalorian' in name) and name not in episodes:
            episodes.add(name)
print(episodes)

{'0.09: Season 2 Wrap-Up', 'Ep 11: Escuela Dura', 'Buffy S2 at 25: Passion', 'Bonus Episode - Buffy The Vampire Slayer Movie w/ Johny Walsh', 'Killed by Death | 30', '080 - Buffy The Vampire Slayer', 'Acting a Lil Different', 'Lie To Me | 19', 'Season 5 Wrap-up', 'The Harvest | 2', 'Nightmares | 10', 'Wondery Presents: The Rewatcher: Buffy The Vampire Slayer', 'Possessive Boyfriends', 'Wondery Presents: The ReWatcher: Buffy The Vampire Slayer', 'Prophecy Girl | 12', 'The Dark Age | 20', 'School Hard (S2.03)', 'Season 3 Wrap-up', 'Buffy S1 at 25: ”Angel”', 'Ep 93: Bringing Down the House', 'Bad Eggs | 24', 'On Buffy: Where Do We Go From Here?', 'Enemies (S3.17)', 'Mommy Issues', 'Never Kill a Boy on the First Date | 5', 'Ep 13: Ghost Willow Saves Halloween', 'Innocence | 26', '0.05: Happy 20th Anniversary, Buffy!', 'Still Pretty', 'Becoming (Part 2) | 34', 'S5E18: “Intervention”', 'Am I Mean!?', 'Phases | 27', 'Episode 375: Special Episode: Introducing The Rewatcher: Buffy the Vampire S

## viz 1:

### analyze release dates for podcast episodes + turn the data into a dataframe

In [76]:
dates = []
years = []
months = []
days = []

for episode in spotify_episodes:
    for item in episode['episodes']['items']:
        name = item['name']
        date = item['release_date']
        date_obj = datetime.datetime.strptime(date, "%Y-%m-%d")
        date_str = date_obj.strftime("%Y-%m-%d")
        if not ('Harry Potter' in name or
                'Spider-Man' in name or
                'Ant-Man' in name or
                'Mandalorian' in name) and date_str not in dates:
            dates.append(date_str)
            years.append(date_obj.year)
            months.append(date_obj.month)
            days.append(date_obj.day)

data = {'Date': dates, 'Year': years, 'Month': months, 'Day': days}
df = pd.DataFrame(data)

df

Unnamed: 0,Date,Year,Month,Day
0,2016-09-14,2016,9,14
1,2023-06-12,2023,6,12
2,2023-06-19,2023,6,19
3,2023-05-29,2023,5,29
4,2023-06-05,2023,6,5
...,...,...,...,...
120,2017-07-19,2017,7,19
121,2021-09-23,2021,9,23
122,2021-05-25,2021,5,25
123,2019-06-20,2019,6,20


### group the dates by year and export it as a .csv file

In [85]:
df_year = df.groupby('Year').size()
df_year

Year
2013     1
2016     5
2017     7
2018     4
2019     4
2020    11
2021    16
2022    27
2023    50
dtype: int64

In [87]:
df.to_csv('episode_dates.csv', index=False)
df_year.to_csv('episode_dates_grouped.csv')

## viz 2:

### create a csv with data of each show

In [91]:
import csv

podcast = set()
field_names = ['Podcast Name', 'Description', 'Total Episodes', 'Explicit', 'URL', 'Image']

# Open the CSV file in write mode
with open('podcasts.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    
    writer.writerow(field_names)

    for show in spotify_shows:
        for total in show['shows']['items']:
            name = total['name'].lower()
            url = url = total['external_urls']['spotify'].split(' ')[0]
            
            if total['explicit'] == True:
                explicit_status = "Yes"
            else:
                explicit_status = "No"
            
            if 'buffy' in name and name not in podcast:
                
                image_urls = []
                for image in total['images']:
                    image_url = image['url']
                    image_urls.append(image_url)
                if image_urls:
                    first_image_url = image_urls[0]
                else:
                    first_image_url = ""
                
                writer.writerow([total['name'], total['description'], total['total_episodes'], explicit_status, url, first_image_url])
                podcast.add(name)

print("CSV file created successfully.")


CSV file created successfully.
