In [None]:
#Save the transcripts from the youtube URL's in interviews.csv into player_transcripts.csv

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

# Load the interviews CSV file into the 'interviews' DataFrame
interviews = pd.read_csv('interviews.csv')

# Initialize the TextFormatter
formatter = TextFormatter()

# Create an empty list to store results
transcripts = []

# Function to extract the video ID from the URL
def extract_video_id(url):
    video_id = url.split('v=')[-1]
    return video_id.split('&')[0]  # Handles case with additional parameters in URL

# Loop through the rows of the 'interviews' dataframe
for _, row in interviews.iterrows():
    video_url = row['url']
    player_name = row['playerName']
    video_id = extract_video_id(video_url)

    try:
        # Fetch the transcript for the video
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        formatted_transcript = formatter.format_transcript(transcript)
        
        # Store player name, URL, and transcript
        transcripts.append({
            'playerName': player_name,
            'url': video_url,
            'transcript': formatted_transcript
        })

    except Exception as e:
        # Handle any errors (e.g., transcript not available)
        transcripts.append({
            'playerName': player_name,
            'url': video_url,
            'transcript': f"Error fetching transcript: {e}"
        })

# Convert the results into a DataFrame
transcript_df = pd.DataFrame(transcripts)

# Save the results to a new CSV file
transcript_df.to_csv('player_transcripts.csv', index=False)

print("Transcripts fetched and saved successfully.")


In [13]:
#Testing out measuring confidence scores for several players (Patrik Laine, Olli Juolevi, Matthew Tkachuk)

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import statistics

nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

responses = [
    "I’ve always been a good playmaker, and even though many focus on my goal scoring, setting up plays is just as important.",
    "Yes, it felt like a good step forward. There’s much more I can offer, and I’m ready to take that next step next year.",
    "I don’t know yet; I haven’t really decided. We’ll see next week how I’m feeling.",
    "I don’t think so. They’ve expressed that they want me here and I feel the same. We just need to work out the terms and money.",
    "We're not there yet, but with young talents like our rookies, I’m very excited about our future."
]

responses2 = [
    "Oh yeah, of course. I'm specific and you're playing like playoffs.",
    "It's a little bit different, but I think it's better.",
    "It was great, like those nine thousand fans every night pretty much and a great team.",
    "I think my hockey IQ and how smart I am. I think it's pretty hard to teach that.",
    "I can always work on getting more muscle, practicing my shooting, or improving other areas.",
    "Yeah, at least Jota, so it's nice.",
    "It was great. My dream as a kid was to play there, and now I won a championship there.",
    "It was great. London is probably the best place to play junior hockey.",
    "It was tough at first, especially with the language, but the team was friendly and made it easier.",
    "Victor Mete helped me a lot. He drove me to practice and was with me pretty much all the time.",
    "It was a long season, almost 10 months with one team for one goal, and it all came down to overtime in one game.",
    "I actually danced because it was my teammate’s birthday, so we had a birthday dance.",
    "It looks good. The northern cities are different from Helsinki, but it's a great city."
]

responses3 = [
    "Just my all-around defensive game, and Coach Dale Hunter put me in situations to succeed. He had me out there at the end of games, and whether it was for three or four minutes protecting a lead, he had trust in me. It really gained a lot of confidence in myself.",
    "No, you get a certain amount of teams that want to interview you, and they're obviously all interested. Who knows where it's going to go from now, but hopefully my year would speak for itself. I hope I did well in my interviews, and I thought we kept the mood light in all of them and had some fun.",
    "It doesn't give you a long time to relax, but that's the way I like it. I'm a kid that's always energetic and wants to keep doing stuff. I can't wait to work out in 10 days and start my summer program.",
    "There are a lot of similarities and a lot of differences, but I don't think about it as pressure. I've had pressure my entire life, whether it's because of the name or because of me putting pressure on myself to be the best player I can be. Pressure is something that doesn't affect me.",
    "I hope so. That's a dream—you want to play in the NHL and play right away. But if that's going to happen, I'm going to need the biggest summer ever. I'm going to be really dedicated this summer to get more explosive, improve my first three steps, and get faster and stronger. The NHL is a fast league, and I need to get faster if I want to play in it."
]


def get_confidence_scores(responses, playerName):
    print(playerName)
    scores = []
    
    for response in responses:
        score = sia.polarity_scores(response)
        confidence_score = score['compound']
        scores.append(confidence_score)
        print(f"Response: {response}\nConfidence Score: {confidence_score}\n")
    
    # Calculate and print the median score
    median_score = statistics.median(scores)
    print(f"Median Confidence Score: {median_score}")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jesse\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [4]:
#Get confidence scores for several players and measure the correlation with NHL stats (GP, G, A, Pts)

import pandas as pd
import numpy as np
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import statistics

nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Example responses for players
responses = [
    "I’ve always been a good playmaker, and even though many focus on my goal scoring, setting up plays is just as important.",
    "Yes, it felt like a good step forward. There’s much more I can offer, and I’m ready to take that next step next year.",
    "I don’t know yet; I haven’t really decided. We’ll see next week how I’m feeling.",
    "I don’t think so. They’ve expressed that they want me here and I feel the same. We just need to work out the terms and money.",
    "We're not there yet, but with young talents like our rookies, I’m very excited about our future."
]

responses2 = [
    "Oh yeah, of course. I'm specific and you're playing like playoffs.",
    "It's a little bit different, but I think it's better.",
    "It was great, like those nine thousand fans every night pretty much and a great team.",
    "I think my hockey IQ and how smart I am. I think it's pretty hard to teach that.",
    "I can always work on getting more muscle, practicing my shooting, or improving other areas.",
    "Yeah, at least Jota, so it's nice.",
    "It was great. My dream as a kid was to play there, and now I won a championship there.",
    "It was great. London is probably the best place to play junior hockey.",
    "It was tough at first, especially with the language, but the team was friendly and made it easier.",
    "Victor Mete helped me a lot. He drove me to practice and was with me pretty much all the time.",
    "It was a long season, almost 10 months with one team for one goal, and it all came down to overtime in one game.",
    "I actually danced because it was my teammate’s birthday, so we had a birthday dance.",
    "It looks good. The northern cities are different from Helsinki, but it's a great city."
]

responses3 = [
    "Just my all-around defensive game, and Coach Dale Hunter put me in situations to succeed. He had me out there at the end of games, and whether it was for three or four minutes protecting a lead, he had trust in me. It really gained a lot of confidence in myself.",
    "No, you get a certain amount of teams that want to interview you, and they're obviously all interested. Who knows where it's going to go from now, but hopefully my year would speak for itself. I hope I did well in my interviews, and I thought we kept the mood light in all of them and had some fun.",
    "It doesn't give you a long time to relax, but that's the way I like it. I'm a kid that's always energetic and wants to keep doing stuff. I can't wait to work out in 10 days and start my summer program.",
    "There are a lot of similarities and a lot of differences, but I don't think about it as pressure. I've had pressure my entire life, whether it's because of the name or because of me putting pressure on myself to be the best player I can be. Pressure is something that doesn't affect me.",
    "I hope so. That's a dream—you want to play in the NHL and play right away. But if that's going to happen, I'm going to need the biggest summer ever. I'm going to be really dedicated this summer to get more explosive, improve my first three steps, and get faster and stronger. The NHL is a fast league, and I need to get faster if I want to play in it."
]

responses4 = [
    "It's good, it was pretty tough, they worked pretty good, but you know, it's important time now to get ready for the season.",
    "Yeah, me, everybody wants to set a good first impression, so maybe you know, you give a little extra when there would be a do in a way in terms of like maybe staying on after extra and trying to show that you know you want to be here and you want to have a role here.",
    "They wanted me to get used to the program. They wanted me to learn some things and, you know, we went over what I should be getting better at. I think it's about your attitude when you get sent down. Obviously, I'm not excited about it, but you have to find a way to get up and not waste a week down.",
    "I didn't play, but it was obviously nice to learn their systems and stuff and just see how they operate. I think that's what I want, to get prepared for the future.",
    "The three and threes are pretty tough, and I've done it in the Junior League, but it looks like it's on a new level here. I'm looking forward to seeing how it is. I'll probably have a better answer after a few games.",
    "Yeah, that's exciting. You don't want it to be uneven, you want everybody to be competitive, and I think that helps everybody get prepared for their ultimate goal to play in the NHL.",
    "They want me to find my offensive game at a pro level, contribute from the back end, and be a factor offensively. They want me to get used to a pro game, getting used to what I've always done but faster and with bigger guys.",
    "I'm a good skater who can join or lead the rush, positionally sound, a puck-moving defenseman who can operate from the blue line and make plays."
]
responses5 = [
    "No regrets at all. I was so happy with my decision to go to the BCHL and pursue the NCAA route. I think I developed so much as a player and a person, and that was huge for me.",
    "Penticton was unbelievable to me these past two years, and now I get to pursue North Dakota and hopefully make it back-to-back National Championships. I'm really excited.",
    "It's pretty special looking back at players like Duncan Keith, Bo Bennett, and Joe Hanson. The BCHL has a lot of star NHL players who played in that league, and it's a special league to be a part of.",
    "The big reason for my success was just the team I had around me. We had a solid four-line team and played a defensive style my first year, and this year we were high-strong offensively with great players like Dante, Scott Conway, and Nick Jones.",
    "It was huge. Wearing the C for Team Canada and holding the trophy up was something I’d always dreamed about. To do that was something special, and winning the World Junior A Challenge was a memorable feeling.",
    "It was cool. The Leafs are such a prestigious team, and meeting those big-name guys was pretty surreal. It’s something I’ll always remember, and I never dreamed that all of this could happen.",
    "I'm really excited about it and looking forward to Friday."
]

responses6 = [
    "Offseason was great. I had a good time back home, worked out a lot, skated, enjoyed the weather, and tried to improve. Season starts soon, and I'm excited.",
    "There's nothing special, just work hard like in the summer, get back confidence, and start fresh.",
    "I feel good. It's a new season. No one should think about last year. I will do my best and see how it goes. I'm in a good spot and in good shape.",
    "Of course, playing with great players like Zach Hyman and Leon Draisaitl is a great opportunity. I’ll try to fit in and contribute where I can.",
    "Last season started really well. In the first 20 games, I was almost at a point-per-game pace. Then I had a small injury, but the team played really well. Now, I start again and try to be my best.",
    "I just need to skate well, help my teammates, and take my chances when they come. Hard shot, strong play in the corners—those are my focus.",
    "I don’t know, maybe my English improved because I try to be better every summer as a player, and maybe the English came along with it!",
    "Yes, it is time. It’s time to establish myself in the NHL and prove what I can do.",
    "No excuses. I was in good shape, and I did everything I could. Sometimes things just don’t go your way, but I worked hard.",
    "I don’t even remember exactly, but after Christmas, I focused on playing well in all areas of the ice, even if I wasn’t scoring as much.",
    "I want to be strong on the forecheck, hold onto the puck, and be strong around the net. Those are my areas to improve.",
    "Yes, I want to get back to skating with the puck and playing with more confidence.",
    "Hard to say, but I try to just be myself in Edmonton. The fans are great, and I appreciate them.",
    "Mikko was a good guy and a good friend, but there are still other Finnish guys here. Nothing changes too much.",
    "Confidence is a big thing in the NHL. You need it every day, every game, every play. I try to stay tough and play with confidence.",
    "Thanks." 
]



# Create a function to calculate the confidence scores
def get_confidence_scores(responses, playerName):
    print(playerName)
    scores = []
    
    for response in responses:
        score = sia.polarity_scores(response)
        confidence_score = score['compound']
        scores.append(confidence_score)
    
    # Calculate and print the median score
    median_score = statistics.median(scores)
    print(f"Median Confidence Score for {playerName}: {median_score}")
    
    return median_score

# Calculate the confidence scores for each player
median_laine = get_confidence_scores(responses, "Patrik Laine")
median_juolevi = get_confidence_scores(responses2, "Olli Juolevi")
median_tkachuk = get_confidence_scores(responses3, "Matthew Tkachuk")
median_johansen = get_confidence_scores(responses4, "Lucas Johansen")
median_jost = get_confidence_scores(responses5, "Tyson Jost")
median_puljujarvi = get_confidence_scores(responses6, "Jesse Puljujarvi")

conf = {
    'Player': ['Patrik Laine', 'Olli Juolevi', 'Matthew Tkachuk', 'Lucas Johansen', 'Tyson Jost', 'Jesse Puljujarvi'],
    'Confidence': [median_laine, median_juolevi, median_tkachuk, median_johansen, median_jost, median_puljujarvi]
}
df_confidence = pd.DataFrame(conf)

df = pd.read_csv('drafted_players.csv')

df = df.merge(df_confidence, on='Player')

# Calculate the correlation matrix excluding the other stats and only showing the Confidence_Score's correlation
correlation = df[['Confidence', 'GP', 'G', 'A', 'Pts']].corr()

# Print only the relevant part of the correlation matrix for Confidence_Score
print("\nCorrelation for All Players Together (Confidence Score Only):")
print(correlation[['Confidence']].dropna())
df.head()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\jesse\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Patrik Laine
Median Confidence Score for Patrik Laine: 0.6908
Olli Juolevi
Median Confidence Score for Olli Juolevi: 0.6705
Matthew Tkachuk
Median Confidence Score for Matthew Tkachuk: 0.9152
Lucas Johansen
Median Confidence Score for Lucas Johansen: 0.7866
Tyson Jost
Median Confidence Score for Tyson Jost: 0.8481
Jesse Puljujarvi
Median Confidence Score for Jesse Puljujarvi: 0.84055

Correlation for All Players Together (Confidence Score Only):
            Confidence
Confidence    1.000000
GP            0.483129
G             0.204104
A             0.467671
Pts           0.358013


Unnamed: 0,Round,Num.,Drafted By,Player,Pos,Drafted From,GP,G,A,Pts,PIM,Last Season,Confidence
0,1.0,2.0,Winnipeg,Patrik Laine,L,Tappara Tampere [SM-liiga],463.0,199.0,181.0,380.0,179.0,2023-24,0.6908
1,1.0,4.0,Edmonton,Jesse Puljujarvi,R,Karpat [SM-liiga],334.0,51.0,63.0,114.0,108.0,2022-23,0.84055
2,1.0,5.0,Vancouver,Olli Juolevi,D,London Knights [OHL],41.0,2.0,1.0,3.0,6.0,2021-22,0.6705
3,1.0,6.0,Calgary,Matthew Tkachuk,L,London Knights [OHL],511.0,192.0,299.0,491.0,548.0,2023-24,0.9152
4,1.0,10.0,Colorado,Tyson Jost,C,Penticton Vees [BCHL],413.0,54.0,80.0,134.0,154.0,2022-23,0.8481


In [15]:
# Separating Answers from the transcripts for all of player_transcripts.csv and printing to "playerResponses.csv"

import pandas as pd
import re
import spacy

# Load English NLP model
nlp = spacy.load("en_core_web_sm")

# Load the CSV file
df = pd.read_csv("player_transcripts.csv")

# Define question indicators
question_words = {"who", "what", "when", "where", "why", "how", "does", "did", "is", "are", "can", "could", "would", "should", "do", "was", "you", "your"}

def clean_text(text):
    """
    Clean and preprocess the text to make it more coherent.
    - Remove unwanted spaces.
    - Combine short fragmented sentences.
    - Fix common punctuation issues.
    """
    # Fix multiple spaces and clean up text
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()

    # Ensure proper punctuation (add space after periods if missing)
    text = re.sub(r'([a-zA-Z0-9])([.,;!?])', r'\1 \2', text)
    return text

def extract_answers(transcript):
    # Process text with spaCy NLP
    doc = nlp(transcript)

    answers = []
    last_was_question = False
    current_answer = []

    for sent in doc.sents:
        words = sent.text.strip().lower().split()

        # Identify questions more accurately
        if words and (words[0] in question_words or "you" in words[:4] or "your" in words[:4]):
            if current_answer:
                # Append the previous accumulated answer and reset
                answers.append(" ".join(current_answer))
                current_answer = []
            last_was_question = True  # Mark as a question
        elif words and last_was_question:
            # If it's an answer after a question, store it as part of the answer
            current_answer.append(sent.text.strip())
            last_was_question = False  # Reset flag
        elif words and not last_was_question:
            # Continue collecting sentences for non-question responses
            current_answer.append(sent.text.strip())

    # If there’s any accumulated answer left, add it
    if current_answer:
        answers.append(" ".join(current_answer))

    # Clean and combine all answers to make them coherent
    return clean_text(" ".join(answers))

# Apply function to extract and clean answers
df["Answers"] = df["transcript"].apply(extract_answers)

# Keep only playerName and Answers columns
df_responses = df[["playerName", "Answers"]]

# Save the results to a new CSV file
df_responses.to_csv("playerResponses.csv", index=False)

print("Processing complete! Player responses saved in playerResponses.csv")


Processing complete! Player responses saved in playerResponses.csv


In [9]:
transcript_df.head()

Unnamed: 0,playerName,url,transcript
0,Olli Juolevi,https://www.youtube.com/watch?v=rTdZL494Oj8,how crazy this time been for you after\ncoming...
1,Tyson Jost,https://www.youtube.com/watch?v=KvX2IFou3R0,the decision to stay and play in the BC\nHocke...
2,Michael McLeod,https://www.youtube.com/watch?v=Ro15gFjuueI,mikee you're really filling out I guess\nthat'...
3,Max Jones,https://www.youtube.com/watch?v=FBBSAUrtMJA,talk about that you know that third\nperiod an...
4,Riley Tufte,https://www.youtube.com/watch?v=UssE9J4Rpd8,back-to-back for the UMD men's hockey\nteam th...


In [5]:
# Working on training a model for taking confidence from prospects and predicting stats at the NHL level

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

features = ['Confidence', 'GP', 'G', 'A']
target = 'Pts'

X = df[features]
y = df[target]

# Split the data into a training set and a test set (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model on the test set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error on test set:", mse)

# ---- Predicting for Undrafted Players ----
# Load undrafted players data from a CSV.
# This CSV should contain at least the columns: Player, Confidence, GP, G, A
undrafted_df = pd.read_csv('undrafted_players.csv')

# Predict career points (Pts) for each undrafted player
undrafted_df['PredictedPts'] = model.predict(undrafted_df[features])

# Sort players from highest to lowest predicted career points
undrafted_df = undrafted_df.sort_values(by='PredictedPts', ascending=False)

# Display the ordering of players (you can adjust the columns as needed)
print("\nPredicted ordering for undrafted players (highest career points to lowest):")
print(undrafted_df[['Player', 'PredictedPts']])


Mean Squared Error on test set: 0.014022521365026188


FileNotFoundError: [Errno 2] No such file or directory: 'undrafted_players.csv'