In [24]:
from googleapiclient.discovery import build

api_key = 'AIzaSyAnqrLmb525xrwXqSbNOPno_ay4bKESYBA'
playlist_ids = [
    'PLyT5NcCVqdX_IpNzjf5izT0jwJJf01jOU',
    'PLqpGk7JGu_zyv3EM2OEFBeydZG7B3Ya_A',
    'PLvr_yz_u6YtzOYDAxwrQ3G_GAxatPqYsF',
    'PLr4TMm2srxnwrq5Zq2BcKPV2S5tJqlRSq',   
]

youtube = build('youtube', 'v3', developerKey=api_key)

In [25]:
#  retrieve video IDs and titles from the playlist
def get_video_details_from_playlist(playlist_id):
    video_details = []
    request = youtube.playlistItems().list(
        part='contentDetails,snippet',
        playlistId=playlist_id,
        maxResults=50
    )
    while request:
        response = request.execute()
        for item in response['items']:
            video_id = item['contentDetails']['videoId']
            title = item['snippet']['title']
            video_details.append((video_id, title))
        request = youtube.playlistItems().list_next(request, response)
    return video_details

# retrieve transcripts and add video titles
def get_transcripts_with_titles(video_details):
    transcripts = {}
    for video_id, title in video_details:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            combined_text = " ".join([entry['text'] for entry in transcript])
            transcripts[video_id] = {'title': title, 'text': combined_text}
        except Exception as e:
            print(f"Could not retrieve transcript for video ID {video_id}: {e}")
    return transcripts

all_transcripts = {}
for playlist_id in playlist_ids:
    print(f"Processing playlist: {playlist_id}")
    video_details = get_video_details_from_playlist(playlist_id)
    transcripts_with_titles = get_transcripts_with_titles(video_details)
    all_transcripts.update(transcripts_with_titles)


Processing playlist: PLyT5NcCVqdX_IpNzjf5izT0jwJJf01jOU
Processing playlist: PLqpGk7JGu_zyv3EM2OEFBeydZG7B3Ya_A
Could not retrieve transcript for video ID XdWmiJVSYhQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=XdWmiJVSYhQ! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
Processing playlist: PLvr_yz_u6YtzOYDAxwrQ3G_GAxatPqYsF
Processing playlist: PLr4TMm2srxnwrq5Zq2BcKPV2S5tJqlRSq


In [34]:
# save transcripts with titles to a file
with open('data/raw_transcripts_boss.txt', 'w', encoding='utf-8') as file:
    for video_id, data in all_transcripts.items():
        title = data['title']
        text = data['text']
        file.write(f"Title: {title}\nVideo ID: {video_id}\nTranscript Text:\n{text}\n\n")

In [35]:
file_path = 'data/raw_transcripts_boss.txt'

with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()


In [36]:
import re

# split the content into sections by each title
sections = re.split(r"Title: ", content)[1:]  # The first split part is empty, skip it.

In [37]:
parsed_data = []
for section in sections:
    title_match = re.search(r"(.*?)\n", section)
    video_id_match = re.search(r"Video ID: (.*?)\n", section)
    transcript_match = re.search(r"Transcript Text:\n(.*)", section, re.S)

    if title_match and video_id_match and transcript_match:
        parsed_data.append({
            "Title": title_match.group(1).strip(),
            "Video ID": video_id_match.group(1).strip(),
            "Transcript": transcript_match.group(1).strip()
        })


In [39]:
print(len(parsed_data))

327


In [56]:
import openai
import os
openai.api_key = os.environ["OPENAI_API_KEY"]

In [41]:
def clean_transcript_for_rag(title, video_id, transcript):
    prompt = f"""
    This is a transcript of a boss fight guide for Elden Ring. Your task is to extract structured information about the boss fight while ensuring the boss’s name is explicitly referenced throughout the output.

    Input:
    - Video Title: {title}
    - Transcript: {transcript}

    Task:
    1. Summarize the key strategy for defeating the boss, explicitly mentioning the boss name throughout.
    2. List the recommended equipment, abilities, or techniques, ensuring they are tied to the boss name wherever relevant.
    3. Provide specific tips for melee and ranged builds, explicitly mentioning the boss name in the tips.
    4. Structure the output with the following headings, ensuring the boss name appears in each section:
    
    """
    
    response = openai.chat.completions.create(
        model="gpt-4o-mini",  # or "gpt-4-turbo" if available
        messages=[
        {
            "role": "user",
            "content": prompt
        }
    ]
    )
    
    return response.choices[0].message.content.strip()

# example usage with the first entry
first_guide = parsed_data[0]
structured_guide = clean_transcript_for_rag(first_guide["Title"], first_guide["Video ID"], first_guide["Transcript"])

print(structured_guide)

# Boss Strategy for Defeating Rennala, Queen of the Full Moon

To defeat Rennala, Queen of the Full Moon, the key strategy involves breaking the aura of her students in phase one. You need to hit the student with a glowing aura once to reveal Rennala. Repeat this process for three students to drop Rennala's health significantly. Avoid killing all the students before breaking the aura, as it could lead to more chaotic positioning in the arena. Once you've reduced half of Rennala's health, prepare for the second phase by buffing your weapon and attacking aggressively to finish her off in the same manner.

## Recommended Equipment and Abilities for Defeating Rennala, Queen of the Full Moon

1. **Spirit Ashes**: Summoning wolf spirit ashes is highly recommended when facing Rennala. This allows for consistent staggering against her.
2. **Bleed Weapons**: Upgrading a melee weapon that can inflict bleed damage is beneficial for defeating Rennala, Queen of the Full Moon.
3. **Holy Damage**: If

In [42]:
output_txt_path = "data/structured_guides_cleaned.txt"

with open(output_txt_path, "w", encoding="utf-8") as output_file:
    for guide in parsed_data:
            structured_guide = clean_transcript_for_rag(
                guide["Title"], 
                guide["Video ID"], 
                guide["Transcript"]
            )
            output_file.write(f"Title: {guide['Title']}\n")
            output_file.write(f"{structured_guide}\n")
            output_file.write("\n" + "="*80 + "\n\n")

In [None]:
### Equipment Guide ###

In [45]:
equiment_transcripts = {}
playlist_id = 'PLyT5NcCVqdX_V_f4l1Fpl18hHXiuhwOCf'
print(f"Processing playlist: {playlist_id}")
video_details = get_video_details_from_playlist(playlist_id)
transcripts_with_titles = get_transcripts_with_titles(video_details)
equiment_transcripts.update(transcripts_with_titles)

Processing playlist: PLyT5NcCVqdX_V_f4l1Fpl18hHXiuhwOCf


In [47]:
with open('data/raw_transcripts_equipment.txt', 'w', encoding='utf-8') as file:
    for video_id, data in equiment_transcripts.items():
        title = data['title']
        text = data['text']
        file.write(f"Title: {title}\nVideo ID: {video_id}\nTranscript Text:\n{text}\n\n")

In [48]:
file_path = 'data/raw_transcripts_equipment.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    content = file.read()
sections = re.split(r"Title: ", content)[1:]
parsed_data_equipment = []
for section in sections:
    title_match = re.search(r"(.*?)\n", section)
    video_id_match = re.search(r"Video ID: (.*?)\n", section)
    transcript_match = re.search(r"Transcript Text:\n(.*)", section, re.S)

    if title_match and video_id_match and transcript_match:
        parsed_data_equipment.append({
            "Title": title_match.group(1).strip(),
            "Video ID": video_id_match.group(1).strip(),
            "Transcript": transcript_match.group(1).strip()
        })

In [51]:
# Function to clean the equipment transcripts using the designed prompt
def clean_equipment_transcript(title, video_id, transcript):
    prompt = f"""
    This is a transcript from an Elden Ring equipment guide video. Your task is to extract and structure information about the equipment discussed, including specific weapons, their strengths and weaknesses, and the ideal use cases.

    Input:
    - Video Title: {title}
    - Transcript: {transcript}

    Task:
    1. Identify each piece of equipment (e.g., weapons, armor, talismans) mentioned in the transcript and summarize its key characteristics.
    2. Highlight the strengths and weaknesses of the equipment in concise bullet points.
    3. Recommend specific builds or playstyles where this equipment would excel, mentioning compatibility with attributes or other equipment.
    4. Structure the output under the following headings:

    """
    
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
        {
            "role": "user",
            "content": prompt
        }
    ]
    )
    
    return response.choices[0].message.content.strip()

# example usage with the first entry
first_guide = parsed_data_equipment[0]
structured_guide = clean_equipment_transcript(first_guide["Title"], first_guide["Video ID"], first_guide["Transcript"])

print(structured_guide)


# Elden Ring Greatsword Equipment Guide

## 1. Alabaster Lord Sword
- **Characteristics:**
  - Lightest greatsword (8 weight)
  - Deals physical and magic damage.
  - Requires points in Strength, Dexterity, and Intelligence.
  
- **Strengths:**
  - Highest physical damage among magic/cold infused greatswords.
  - Bonus damage to gravity-type enemies.
  - Excellent strength scaling.
  - Can acquire two without player drops.

- **Weaknesses:**
  - Shorter than most greatswords.
  - Second lowest magic damage among magic/cold infused greatswords.
  - Poor dexterity and intelligence scaling.
  - Try to power stance with two copies for lowered equip load.

- **Recommended Builds:**
  - Primarily suited for pure Strength builds.
  - Use alongside a greatshield for block-counter strategies or dual-wielding for added versatility.

## 2. Ordovis's Greatsword
- **Characteristics:**
  - Weighs 12.
  - Deals physical and holy damage.
  - Requires Strength, Dexterity, and Faith.

- **Strengths:**
 

In [53]:
output_txt_path = "data/structured_equipments_guides_cleaned.txt"

with open(output_txt_path, "w", encoding="utf-8") as output_file:
    for guide in parsed_data_equipment:
            structured_guide = clean_equipment_transcript(
                guide["Title"], 
                guide["Video ID"], 
                guide["Transcript"]
            )
            output_file.write(f"Title: {guide['Title']}\n")
            output_file.write(f"{structured_guide}\n")
            output_file.write("\n" + "="*80 + "\n\n")