<!-- ### Plotly -->

In [2]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

<!-- ## Top X Shooters -->

In [57]:
# from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.chrome.options import Options
# from webdriver_manager.chrome import ChromeDriverManager
# from bs4 import BeautifulSoup
# import pandas as pd
# import time

# # Setup Selenium
# options = Options()
# options.headless = True  # Run in headless mode (no GUI)

# # Initialize WebDriver
# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# # Load the page
# url = "https://www.basketball-reference.com/leaders/fg3_career.html"
# driver.get(url)

# # Wait for JavaScript to render content
# time.sleep(5)  # Adjust if necessary

# # Get page source after JavaScript executes
# soup = BeautifulSoup(driver.page_source, "html.parser")
# driver.quit()

# # Find the correct table
# table = soup.find("table", {"id": "tot"})

# if not table:
#     print("Table not found. The site may be blocking automated access.")
# else:
#     rows = table.find_all("tr", {"data-row": True})  # Extract data rows
#     print(f"Total rows found: {len(rows)}")  # Debugging output

#     ranks, names, num_threes_career, links = [], [], [], []

#     for row in rows[:36]:  # Limit to top 35 players
#         columns = row.find_all("td")

#         if len(columns) < 3:
#             print("Skipping invalid row:", row)  # Debugging output
#             continue  # Skip rows with missing data

#         rank = columns[0].text.strip().replace(".", "")
#         name_tag = columns[1].find("a")  # Extract the <a> tag inside the name column
#         name = name_tag.text.strip() if name_tag else columns[1].text.strip().replace("*", "")  # Get player name
#         link = "https://www.basketball-reference.com" + name_tag["href"] if name_tag else ""  # Get full link

#         threes = columns[2].text.strip()

#         ranks.append(rank)
#         names.append(name)
#         num_threes_career.append(threes)
#         links.append(link)

#     # Create DataFrame with lowercase column names
#     df = pd.DataFrame({
#         "rank": ranks, 
#         "name": names, 
#         "num_threes_career": num_threes_career, 
#         "link": links
#     })

#     # Save as CSV
#     csv_filename = "d_top_shooters.csv"
#     df.to_csv(csv_filename, index=False, encoding="utf-8")

#     print(f"Data saved to {csv_filename}")

Total rows found: 258
Skipping invalid row: <tr class="thead" data-row="30">
<th>Rank</th>
<th>Player</th>
<th>3P</th>
</tr>
Data saved to d_top_shooters.csv


<!-- ## Per Season stats -->

<!-- ### f: extract_shooting_stats -->

In [55]:
# import requests
# from bs4 import BeautifulSoup
# import pandas as pd
# from IPython.core.display import display, HTML

# def extract_shooting_stats(link):
#     """
#     Connects to a Basketball Reference player page and extracts 'season', 'age', '3P', and '3PA' from the 'Totals' table.
    
#     Parameters:
#         link (str): The URL of the player's profile page.
        
#     Returns:
#         DataFrame with 'season' (as a counter), 'age', '3P', and '3PA' columns.
#     """
#     headers = {
#         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
#     }
    
#     # Fetch the page
#     response = requests.get(link, headers=headers)
    
#     if response.status_code != 200:
#         print(f"Failed to fetch page: {response.status_code}")
#         return pd.DataFrame()
    
#     # Parse the HTML
#     soup = BeautifulSoup(response.content, "html.parser")

#     # Find the "Totals" heading
#     totals_heading = soup.find("h2", string="Totals")
    
#     if not totals_heading:
#         print("Totals heading not found on the page.")
#         return pd.DataFrame()

#     # Find the table immediately after the "Totals" heading
#     totals_table = totals_heading.find_next("table")

#     if not totals_table:
#         print("Totals table not found.")
#         return pd.DataFrame()

#     # Extract table rows
#     rows = totals_table.find("tbody").find_all("tr")

#     # Lists to store extracted data
#     seasons, ages, threes, three_attempts = [], [], [], []

#     # Initialize season counter
#     season_counter = 1

#     for row in rows:
#         columns = row.find_all("td")

#         if len(columns) < 12:  # Ensure we have enough columns
#             continue  

#         season = season_counter  # 'season' as a counter starting at 1
#         age = columns[0].text.strip()  # 'age'
#         three_pointers = columns[10].text.strip() if len(columns) > 10 else "0"  # '3p'
#         three_attempts_value = columns[11].text.strip() if len(columns) > 11 else "0"  # '3pa'

#         seasons.append(season)
#         ages.append(age)
#         threes.append(three_pointers)
#         three_attempts.append(three_attempts_value)

#         season_counter += 1  # Increment season counter

#     # Create DataFrame with lowercase column names
#     f_three_pointers = pd.DataFrame({
#         "season": seasons, 
#         "age": ages, 
#         "3p": threes,
#         "3pa": three_attempts
#     })

#     # Convert numeric columns to integers
#     f_three_pointers["3p"] = pd.to_numeric(f_three_pointers["3p"], errors="coerce").fillna(0).astype(int)
#     f_three_pointers["3pa"] = pd.to_numeric(f_three_pointers["3pa"], errors="coerce").fillna(0).astype(int)
#     f_three_pointers["age"] = pd.to_numeric(f_three_pointers["age"], errors="coerce").fillna(0).astype(int)

#     # Display as a scrollable table in Jupyter Notebook
#     display(HTML(f_three_pointers.to_html(notebook=True, escape=False)))
    
#     return f_three_pointers

# # Example Usage
# player_link = "https://www.basketball-reference.com/players/c/curryst01.html"  # Stephen Curry's profile
# f_three_pointers = extract_shooting_stats(player_link)


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



Unnamed: 0,season,age,3p,3pa
0,1,21,166,380
1,2,22,151,342
2,3,23,55,121
3,4,24,272,600
4,5,25,261,615
5,6,26,286,646
6,7,27,402,886
7,8,28,324,789
8,9,29,212,501
9,10,30,354,810


<!-- ## f_top_shooters -->

In [58]:
# import pandas as pd
# import requests
# from bs4 import BeautifulSoup
# from IPython.core.display import display, HTML

# # Ensure extract_shooting_stats is already defined

# def extract_f_top_shooters(file_path="d_top_shooters.csv"):
#     """
#     Reads player links from d_top_shooters.csv, selects the top 30 players by rank, 
#     and runs extract_shooting_stats on each link.
    
#     Saves the combined results into f_top_shooters.csv.
    
#     Parameters:
#         file_path (str): Path to d_top_shooters.csv.
        
#     Returns:
#         DataFrame with columns ['name', 'rank', 'season', 'age', '3p', '3pa']
#     """
#     # Load player links
#     df_shooters = pd.read_csv(file_path)

#     # Ensure required columns exist
#     if "rank" not in df_shooters.columns or "name" not in df_shooters.columns or "link" not in df_shooters.columns:
#         print("Error: The file must contain 'rank', 'name', and 'link' columns.")
#         return pd.DataFrame()

#     # Process only the top 30 based on rank
#     df_top_30 = df_shooters.nsmallest(30, "rank")

#     all_data = []  # List to store results

#     # Iterate through each player
#     for index, row in df_top_30.iterrows():
#         name, rank, link = row["name"], row["rank"], row["link"]
#         print(f"Processing: {name} (Rank: {rank}) - {index+1}/{len(df_top_30)}")  # Progress tracking

#         # Extract stats
#         player_df = extract_shooting_stats(link)

#         if player_df.empty:
#             print(f"Skipping {name} (No data found)")
#             continue  # Skip if no data

#         # Add player name and rank as first two columns
#         player_df.insert(0, "name", name)
#         player_df.insert(1, "rank", rank)

#         # Store result
#         all_data.append(player_df)

#     # Combine all player data
#     if not all_data:
#         print("No data extracted for any player.")
#         return pd.DataFrame()

#     f_top_shooters_df = pd.concat(all_data, ignore_index=True)

#     # Save the DataFrame
#     f_top_shooters_df.to_csv("f_top_shooters.csv", index=False, encoding="utf-8")

#     # Display as a scrollable table in Jupyter Notebook
#     display(HTML(f_top_shooters_df.to_html(notebook=True, escape=False)))

#     print("Data saved as f_top_shooters.csv")
    
#     return f_top_shooters_df

# # Run the function
# f_top_shooters = extract_f_top_shooters()


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



Processing: Stephen Curry (Rank: 1) - 1/30


Unnamed: 0,season,age,3p,3pa
0,1,21,166,380
1,2,22,151,342
2,3,23,55,121
3,4,24,272,600
4,5,25,261,615
5,6,26,286,646
6,7,27,402,886
7,8,28,324,789
8,9,29,212,501
9,10,30,354,810


Processing: James Harden (Rank: 2) - 2/30


Unnamed: 0,season,age,3p,3pa
0,1,20,93,248
1,2,21,113,324
2,3,22,114,292
3,4,23,179,486
4,5,24,177,483
5,6,25,208,555
6,7,26,236,657
7,8,27,262,756
8,9,28,265,722
9,10,29,378,1028


Processing: Ray Allen (Rank: 3) - 3/30


Unnamed: 0,season,age,3p,3pa
0,1,21,117,298
1,2,22,134,368
2,3,23,74,208
3,4,24,172,407
4,5,25,202,467
5,6,26,229,528
6,7,27,201,533
7,8,27,123,311
8,9,27,78,222
9,10,28,148,378


Processing: Damian Lillard (Rank: 4) - 4/30


Unnamed: 0,season,age,3p,3pa
0,1,22,185,503
1,2,23,218,554
2,3,24,196,572
3,4,25,229,610
4,5,26,214,579
5,6,27,227,629
6,7,28,237,643
7,8,29,270,674
8,9,30,275,704
9,10,31,92,284


Processing: Klay Thompson (Rank: 5) - 5/30


Unnamed: 0,season,age,3p,3pa
0,1,21,111,268
1,2,22,211,526
2,3,23,223,535
3,4,24,239,545
4,5,25,276,650
5,6,26,268,647
6,7,27,229,521
7,8,28,241,599
8,9,31,114,296
9,10,32,301,731


Processing: Reggie Miller (Rank: 6) - 6/30


Unnamed: 0,season,age,3p,3pa
0,1,22,61,172
1,2,23,98,244
2,3,24,150,362
3,4,25,112,322
4,5,26,129,341
5,6,27,167,419
6,7,28,123,292
7,8,29,195,470
8,9,30,168,410
9,10,31,229,536


Processing: LeBron James (Rank: 7) - 7/30


Unnamed: 0,season,age,3p,3pa
0,1,19,63,217
1,2,20,108,308
2,3,21,127,379
3,4,22,99,310
4,5,23,113,359
5,6,24,132,384
6,7,25,129,387
7,8,26,92,279
8,9,27,54,149
9,10,28,103,254


Processing: Kyle Korver (Rank: 8) - 8/30


Unnamed: 0,season,age,3p,3pa
0,1,22,81,207
1,2,23,226,558
2,3,24,184,438
3,4,25,132,307
4,5,26,111,296
5,6,26,38,108
6,7,26,73,188
7,8,27,103,267
8,9,28,59,110
9,10,29,120,289


Processing: Paul George (Rank: 9) - 9/30


Unnamed: 0,season,age,3p,3pa
0,1,20,41,138
1,2,21,90,234
2,3,22,170,469
3,4,23,182,500
4,5,24,9,22
5,6,25,210,566
6,7,26,195,496
7,8,27,244,609
8,9,28,292,757
9,10,29,157,381


Processing: Vince Carter (Rank: 10) - 10/30


Unnamed: 0,season,age,3p,3pa
0,1,22,19,66
1,2,23,95,236
2,3,24,162,397
3,4,25,121,313
4,5,26,45,131
5,6,27,93,243
6,7,28,127,313
7,8,28,19,59
8,9,28,108,254
9,10,29,125,367


Processing: Jason Terry (Rank: 11) - 11/30


Unnamed: 0,season,age,3p,3pa
0,1,22,46,157
1,2,23,124,314
2,3,24,172,444
3,4,25,160,431
4,5,26,146,421
5,6,27,103,245
6,7,28,171,416
7,8,29,162,370
8,9,30,136,363
9,10,31,167,456


Processing: Jamal Crawford (Rank: 12) - 12/30


Unnamed: 0,season,age,3p,3pa
0,1,20,41,117
1,2,21,26,58
2,3,22,86,242
3,4,23,165,521
4,5,24,185,512
5,6,25,101,293
6,7,26,103,322
7,8,27,176,494
8,9,28,142,394
9,10,28,35,77


Processing: Kyle Lowry (Rank: 13) - 13/30


Unnamed: 0,season,age,3p,3pa
0,1,20,3,8
1,2,21,36,140
2,3,22,25,98
3,4,22,17,69
4,5,22,8,29
5,6,23,37,136
6,7,24,129,343
7,8,25,79,211
8,9,26,101,279
9,10,27,190,500


Processing: Kevin Durant (Rank: 14) - 14/30


Unnamed: 0,season,age,3p,3pa
0,1,19,59,205
1,2,20,97,230
2,3,21,128,351
3,4,22,145,414
4,5,23,133,344
5,6,24,139,334
6,7,25,192,491
7,8,26,64,159
8,9,27,186,481
9,10,28,117,312


Processing: Paul Pierce (Rank: 15) - 15/30


Unnamed: 0,season,age,3p,3pa
0,1,21,84,204
1,2,22,96,280
2,3,23,147,384
3,4,24,210,520
4,5,25,118,391
5,6,26,115,384
6,7,27,108,292
7,8,28,111,314
8,9,29,107,275
9,10,30,143,365


Processing: Buddy Hield (Rank: 16) - 16/30


Unnamed: 0,season,age,3p,3pa
0,1,24,148,379
1,2,24,89,241
2,3,24,59,138
3,4,25,176,408
4,5,26,278,651
5,6,27,271,688
6,7,28,282,721
7,8,29,262,716
8,9,29,182,495
9,10,29,80,221


Processing: Eric Gordon (Rank: 17) - 17/30


Unnamed: 0,season,age,3p,3pa
0,1,20,131,337
1,2,21,119,321
2,3,22,106,291
3,4,23,10,40
4,5,24,56,173
5,6,25,101,258
6,7,26,141,315
7,8,27,113,294
8,9,28,246,661
9,10,29,218,608


Processing: Jason Kidd (Rank: 18) - 18/30


Unnamed: 0,season,age,3p,3pa
0,1,21,70,257
1,2,22,133,396
2,3,23,61,165
3,4,23,21,65
4,5,23,40,100
5,6,24,73,233
6,7,25,45,123
7,8,26,56,166
8,9,27,69,232
9,10,28,117,364


Processing: Dirk Nowitzki (Rank: 19) - 19/30


Unnamed: 0,season,age,3p,3pa
0,1,20,14,68
1,2,21,116,306
2,3,22,151,390
3,4,23,139,350
4,5,24,148,390
5,6,25,99,290
6,7,26,91,228
7,8,27,110,271
8,9,28,72,173
9,10,29,79,220


Processing: Joe Johnson (Rank: 20) - 20/30


Unnamed: 0,season,age,3p,3pa
0,1,20,38,130
1,2,20,24,88
2,3,20,14,42
3,4,21,75,205
4,5,22,83,272
5,6,23,177,370
6,7,24,128,360
7,8,25,119,312
8,9,26,169,444
9,10,27,149,414


Processing: CJ McCollum (Rank: 21) - 21/30


Unnamed: 0,season,age,3p,3pa
0,1,22,30,80
1,2,23,55,139
2,3,24,197,472
3,4,25,185,439
4,5,26,189,476
5,6,27,167,445
6,7,28,194,512
7,8,29,169,420
8,9,30,182,469
9,10,30,111,289


Processing: JJ Redick (Rank: 22) - 22/30


Unnamed: 0,season,age,3p,3pa
0,1,22,38,98
1,2,23,17,43
2,3,24,67,179
3,4,25,111,274
4,5,26,87,219
5,6,27,112,268
6,7,28,165,451
7,8,28,117,300
8,9,28,48,151
9,10,29,73,185


Processing: J.R. Smith (Rank: 23) - 23/30


Unnamed: 0,season,age,3p,3pa
0,1,19,81,281
1,2,20,52,140
2,3,21,149,382
3,4,22,157,390
4,5,23,180,453
5,6,24,158,467
6,7,25,124,318
7,8,26,67,193
8,9,27,155,436
9,10,28,189,480


Processing: Mike Conley (Rank: 24) - 24/30


Unnamed: 0,season,age,3p,3pa
0,1,20,30,91
1,2,21,88,217
2,3,22,82,212
3,4,23,80,217
4,5,24,60,159
5,6,25,106,293
6,7,26,105,291
7,8,27,107,277
8,9,28,78,215
9,10,29,171,419


Processing: Kyrie Irving (Rank: 25) - 25/30


Unnamed: 0,season,age,3p,3pa
0,1,19,73,183
1,2,20,109,279
2,3,21,123,344
3,4,22,157,378
4,5,23,84,262
5,6,24,177,441
6,7,25,166,407
7,8,26,174,434
8,9,27,56,142
9,10,28,152,378


Processing: Wesley Matthews (Rank: 26) - 26/30


Unnamed: 0,season,age,3p,3pa
0,1,23,63,165
1,2,24,154,378
2,3,25,129,337
3,4,26,169,425
4,5,27,201,511
5,6,28,173,445
6,7,29,189,525
7,8,30,174,479
8,9,31,153,402
9,10,32,150,403


Processing: Chris Paul (Rank: 27) - 27/30


Unnamed: 0,season,age,3p,3pa
0,1,20,50,177
1,2,21,50,143
2,3,22,92,249
3,4,23,64,176
4,5,24,52,127
5,6,25,71,183
6,7,26,79,213
7,8,27,76,232
8,9,28,78,212
9,10,29,139,349


Processing: Chauncey Billups (Rank: 28) - 28/30


Unnamed: 0,season,age,3p,3pa
0,1,21,107,325
1,2,21,64,189
2,3,21,43,136
3,4,22,85,235
4,5,23,7,41
5,6,24,73,194
6,7,25,124,315
7,8,26,149,380
8,9,27,130,335
9,10,28,165,387


Processing: Kobe Bryant (Rank: 29) - 29/30


Unnamed: 0,season,age,3p,3pa
0,1,18,51,136
1,2,19,75,220
2,3,20,27,101
3,4,21,46,144
4,5,22,61,200
5,6,23,33,132
6,7,24,124,324
7,8,25,71,217
8,9,26,131,387
9,10,27,180,518


Processing: Tim Hardaway Jr. (Rank: 30) - 30/30


Unnamed: 0,season,age,3p,3pa
0,1,21,130,358
1,2,22,121,354
2,3,23,48,142
3,4,24,149,417
4,5,25,130,410
5,6,26,162,477
6,7,26,117,337
7,8,26,45,140
8,9,27,204,513
9,10,28,207,529


Unnamed: 0,name,rank,season,age,3p,3pa
0,Stephen Curry,1,1,21,166,380
1,Stephen Curry,1,2,22,151,342
2,Stephen Curry,1,3,23,55,121
3,Stephen Curry,1,4,24,272,600
4,Stephen Curry,1,5,25,261,615
5,Stephen Curry,1,6,26,286,646
6,Stephen Curry,1,7,27,402,886
7,Stephen Curry,1,8,28,324,789
8,Stephen Curry,1,9,29,212,501
9,Stephen Curry,1,10,30,354,810


Data saved as f_top_shooters.csv


In [60]:
# import pandas as pd
# from IPython.core.display import display, HTML

# # Load f_top_shooters.csv
# f_top_shooters = pd.read_csv("f_top_shooters.csv")

# # Ensure required columns exist
# required_cols = {"name", "rank", "season", "age", "3p", "3pa"}
# if not required_cols.issubset(f_top_shooters.columns):
#     print(f"Error: The file must contain {required_cols} columns.")
# else:
#     # Sort by rank, name, and age before deduplication
#     f_top_shooters.sort_values(by=["rank", "name", "age"], ascending=[True, True, True], inplace=True)

#     # Remove duplicates, keeping only the first row per (name, age) pair
#     f_top_shooters = f_top_shooters.drop_duplicates(subset=["name", "age"], keep="first")

#     # Recalculate season numbers to maintain sequential order
#     f_top_shooters["season"] = f_top_shooters.groupby("name").cumcount() + 1

#     # Recalculate cumulative sums per player
#     f_top_shooters["3p_cum"] = f_top_shooters.groupby("name")["3p"].cumsum()
#     f_top_shooters["3pa_cum"] = f_top_shooters.groupby("name")["3pa"].cumsum()

#     # Save the cleaned DataFrame
#     f_top_shooters.to_csv("f_top_shooters.csv", index=False, encoding="utf-8")

#     # Display as a scrollable table in Jupyter Notebook
#     display(HTML(f_top_shooters.to_html(notebook=True, escape=False)))

#     print("Updated f_top_shooters.csv with deduplicated players and correct cumulative 3P and 3PA.")


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



Unnamed: 0,name,rank,season,age,3p,3pa,3p_cum,3pa_cum
0,Stephen Curry,1,1,21,166,380,166,380
1,Stephen Curry,1,2,22,151,342,317,722
2,Stephen Curry,1,3,23,55,121,372,843
3,Stephen Curry,1,4,24,272,600,644,1443
4,Stephen Curry,1,5,25,261,615,905,2058
5,Stephen Curry,1,6,26,286,646,1191,2704
6,Stephen Curry,1,7,27,402,886,1593,3590
7,Stephen Curry,1,8,28,324,789,1917,4379
8,Stephen Curry,1,9,29,212,501,2129,4880
9,Stephen Curry,1,10,30,354,810,2483,5690


Updated f_top_shooters.csv with deduplicated players and correct cumulative 3P and 3PA.


In [3]:
import pandas as pd
import plotly.express as px
import os
from IPython.core.display import display

# Function to generate and display the final cumulative 3P trend chart
def display_final_cumulative_3p_chart(file_path="f_top_shooters.csv", height=600, width=800):
    """
    Reads f_top_shooters.csv and generates the final cumulative 3P trend chart (last frame only).
    
    Displays the chart directly in Jupyter Notebook.
    
    Parameters:
        file_path (str): Path to f_top_shooters.csv.
        height (int): Chart height in pixels.
        width (int): Chart width in pixels.

    Returns:
        None
    """
    # Load data
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return
    
    f_top_shooters = pd.read_csv(file_path)

    # Ensure required columns exist
    required_cols = {"name", "rank", "season", "3p_cum"}
    if not required_cols.issubset(f_top_shooters.columns):
        print(f"Error: The file must contain {required_cols} columns.")
        return

    # Get unique players ordered from rank 30 to rank 1
    players = f_top_shooters.sort_values("rank", ascending=False)["name"].unique()

    # Initialize the final dataset for the last frame
    final_chart_data = pd.DataFrame()

    # Loop through each player in descending rank order (30 → 1)
    for player_name in players:
        # Filter the data for the current player
        player_data = f_top_shooters[f_top_shooters['name'] == player_name]

        if player_data.empty:
            print(f"No data available for {player_name}")
            continue

        # Get the player's full cumulative history (for final frame)
        final_chart_data = pd.concat([final_chart_data, player_data], ignore_index=True)

    # Determine global max for y-axis
    global_max = final_chart_data["3p_cum"].max() + 100

    # Generate the final trend chart
    fig = px.line(final_chart_data, 
                  x="season", 
                  y="3p_cum", 
                  color="name", 
                  template="HorizonAnalytics")

    # Fix x-axis and y-axis labels, ensure labels are clear
    fig.update_layout(
        xaxis=dict(
            title=None, 
            showticklabels=True,
            ticklabelposition="outside",
            tickmode="linear",
            dtick=1  # Ensure only whole numbers appear on x-axis
        ),
        yaxis=dict(
            range=[0, global_max],  
            title=None, 
            showticklabels=True,
            ticklabelposition="outside",
        ),
        margin=dict(l=180, r=40, t=40, b=140),  # Adjust margins for clarity
        showlegend=False,
        height=height,
        width=width,
        plot_bgcolor="rgba(0, 0, 0, 0)",  # Transparent background
        paper_bgcolor="rgba(0, 0, 0, 0)"
    )

    # Display the final frame in Jupyter Notebook
    display(fig)

# Run the function to display the last frame
display_final_cumulative_3p_chart(file_path="f_top_shooters.csv", height=1080, width=1280)

  from IPython.core.display import display, HTML


Charts saved in: /Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03 three_pointers/trend_chart


<!-- extract text: name, num threes, record
extract photos:  players -->

In [33]:
import pandas as pd
import plotly.express as px
import os
from IPython.core.display import display

# Function to generate the final cumulative 3P trend chart
def display_final_cumulative_3p_chart(file_path="f_top_shooters.csv", height=600, width=800, line_width=2):
    """
    Reads f_top_shooters.csv and generates the final cumulative 3P trend chart,
    ensuring each player starts at Season 0 with 3P cumulative = 0.
    
    Displays the chart in Jupyter Notebook (does not save it as a file).

    Parameters:
        file_path (str): Path to f_top_shooters.csv.
        height (int): Chart height in pixels.
        width (int): Chart width in pixels.
        line_width (int): Thickness of the lines in the plot.

    Returns:
        None (displays the figure).
    """
    # Load data
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return
    
    f_top_shooters = pd.read_csv(file_path)

    # Ensure required columns exist
    required_cols = {"name", "rank", "season", "3p_cum"}
    if not required_cols.issubset(f_top_shooters.columns):
        print(f"Error: The file must contain {required_cols} columns.")
        return

    # Create a new DataFrame ensuring every player starts at season 0 with 3p_cum = 0
    adjusted_data = []

    for player_name, group in f_top_shooters.groupby("name"):
        # Create the Season 0 entry
        season_0 = {
            "name": player_name,
            "rank": group["rank"].iloc[0],  # Keep the rank of the player
            "season": 0,
            "3p_cum": 0
        }
        
        # Append Season 0 row, then all other rows for the player
        adjusted_data.append(season_0)
        adjusted_data.extend(group.to_dict(orient="records"))

    # Convert adjusted data back to DataFrame
    f_top_shooters = pd.DataFrame(adjusted_data)

    # Ensure correct ordering: first by rank (descending), then by season (ascending)
    f_top_shooters = f_top_shooters.sort_values(["rank", "season"], ascending=[False, True])

    # Get unique players ordered from rank 30 to rank 1
    players = f_top_shooters["name"].unique()

    # Initialize list to store traces
    traces = []

    # Loop through each player
    for player_name in players:
        # Filter the data for the current player
        player_data = f_top_shooters[f_top_shooters['name'] == player_name]

        if player_data.empty:
            print(f"No data available for {player_name}")
            continue

        # Define line color: Steph Curry in yellow, others in grey
        line_color = "yellow" if player_name == "Stephen Curry" else "lightgrey"

        # Add player trace
        trace = px.line(
            player_data,
            x="season",
            y="3p_cum",
            color_discrete_sequence=[line_color]
        ).data[0]
        
        # Set line width
        trace.line.width = line_width
        traces.append(trace)

    # Create the final figure with all traces
    fig = px.line(template="HorizonAnalytics")
    for trace in traces:
        fig.add_trace(trace)

    # Configure layout settings
    fig.update_layout(
        xaxis=dict(
            title=None,
            showticklabels=True,
            ticklabelposition="outside",
            tickmode="linear",
            dtick=1  # Ensure only whole numbers appear on x-axis
        ),
        yaxis=dict(
            title=None,
            showticklabels=True,
            ticklabelposition="outside",
        ),
        margin=dict(l=180, r=40, t=40, b=140),  # Increase margins to prevent axis label cutoff
        showlegend=False,
        height=height,
        width=width,
        plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background
        paper_bgcolor='rgba(0, 0, 0, 0)'
    )

    # Display the final chart
    display(fig)

# Run the function to display the last frame
display_final_cumulative_3p_chart(file_path="f_top_shooters.csv", height=1080, width=1920, line_width=12)


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



<!-- Photos -->

In [6]:
import os
import requests
import pandas as pd

# Define save location
output_dir = "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03 three_pointers/player_images"
os.makedirs(output_dir, exist_ok=True)

# Manually provided NBA stats links for top 30 shooters
nba_stats_links = [
    "https://www.nba.com/stats/player/201939/traditional",
    "https://www.nba.com/stats/player/201935/traditional",
    "https://www.nba.com/stats/player/951/traditional",
    "https://www.nba.com/stats/player/203081/traditional",
    "https://www.nba.com/stats/player/202691/traditional",
    "https://www.nba.com/stats/player/397/traditional",
    "https://www.nba.com/stats/player/2544/traditional",
    "https://www.nba.com/stats/player/2594/traditional",
    "https://www.nba.com/stats/player/202331/traditional",
    "https://www.nba.com/stats/player/1713/traditional",
    "https://www.nba.com/stats/player/1891/traditional",
    "https://www.nba.com/stats/player/2037/traditional",
    "https://www.nba.com/stats/player/200768/traditional",
    "https://www.nba.com/stats/player/201142/traditional",
    "https://www.nba.com/stats/player/1718/traditional",
    "https://www.nba.com/stats/player/1627741/traditional",
    "https://www.nba.com/stats/player/201569/traditional",
    "https://www.nba.com/stats/player/467/traditional",
    "https://www.nba.com/stats/player/1717/traditional",
    "https://www.nba.com/stats/player/2207/traditional",
    "https://www.nba.com/stats/player/203468/traditional",
    "https://www.nba.com/stats/player/200755/traditional",
    "https://www.nba.com/stats/player/2747/traditional",
    "https://www.nba.com/stats/player/201144/traditional",
    "https://www.nba.com/stats/player/202681/traditional",
    "https://www.nba.com/stats/player/202083/traditional",
    "https://www.nba.com/stats/player/101108/traditional",
    "https://www.nba.com/stats/player/1497/traditional",
    "https://www.nba.com/stats/player/977/traditional",
    "https://www.nba.com/stats/player/203501/traditional"
]

# Read d_top_shooters.csv and get top 30 players (to match names to links)
file_path = "d_top_shooters.csv"
df_shooters = pd.read_csv(file_path)
df_top_30 = df_shooters.nsmallest(30, "rank")

# Ensure we have exactly 30 links
if len(df_top_30) != len(nba_stats_links):
    print("Error: Mismatch between top 30 players and provided NBA links.")
    exit()

# Function to download an image
def download_image(url, path):
    if url:
        headers = {"User-Agent": "Mozilla/5.0"}
        for attempt in range(3):  # Retry up to 3 times
            print(f"Downloading (attempt {attempt+1}) {url} -> {path}")
            response = requests.get(url, headers=headers, allow_redirects=True, stream=True)
            if response.status_code == 200:
                with open(path, "wb") as file:
                    for chunk in response.iter_content(1024):
                        file.write(chunk)
                print(f"✅ Saved: {path}")
                return
            else:
                print(f"❌ Failed (attempt {attempt+1}) with status code: {response.status_code}")
        print(f"🚨 Skipping: {url} after 3 failed attempts")

# Process each player
for index, (nba_link, row) in enumerate(zip(nba_stats_links, df_top_30.itertuples()), start=1):
    name = row.name
    rank = row.rank
    player_id = nba_link.split("/")[-2]  # Extracts the player ID from the URL

    # Construct the headshot URL
    image_url = f"https://cdn.nba.com/headshots/nba/latest/1040x760/{player_id}.png"

    # Save as {rank}.jpg (e.g., 1.jpg, 2.jpg, ..., 30.jpg)
    file_path = os.path.join(output_dir, f"{rank}.jpg")
    download_image(image_url, file_path)

print("All available images downloaded successfully!")

In [5]:
import os
import shutil
import pandas as pd
from PIL import Image

# Directories
base_dir = "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03 three_pointers"
source_dir = os.path.join(base_dir, "player_images")  # Where headshots are stored
output_dir = os.path.join(base_dir, "player_frames")  # Where duplicated frames will be stored

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load data
f_top_shooters_path = "f_top_shooters.csv"
d_top_shooters_path = "d_top_shooters.csv"

if not os.path.exists(f_top_shooters_path) or not os.path.exists(d_top_shooters_path):
    print("Error: Missing required CSV files.")
    exit()

df_shooters = pd.read_csv(d_top_shooters_path)  # Original rankings
df_top_shooters = pd.read_csv(f_top_shooters_path)  # Expanded season data

# Reverse the order: Start from rank 30, moving to rank 1
df_top_shooters = df_top_shooters.iloc[::-1].reset_index(drop=True)

# Create a mapping: {name → rank}
name_to_rank = df_shooters.set_index("name")["rank"].to_dict()

# Iterate over f_top_shooters in reversed order
for index, row in enumerate(df_top_shooters.itertuples(), start=1):
    name = row.name
    rank = name_to_rank.get(name)

    if rank is None:
        print(f"❌ No rank found for {name}, skipping.")
        continue

    # Special case for Jason Kidd (rank 18, uses .png)
    if rank == 18:
        source_image = os.path.join(source_dir, "18.png")
    else:
        source_image = os.path.join(source_dir, f"{rank}.jpg")

    # Destination image path (000001.png, 000002.png, ...)
    dest_image = os.path.join(output_dir, f"{index:06d}.png")

    # Convert and save as PNG
    if os.path.exists(source_image):
        try:
            img = Image.open(source_image)
            img.save(dest_image, "PNG")  # Convert to PNG format
            print(f"✅ Converted & saved: {dest_image}")
        except Exception as e:
            print(f"❌ Error converting {source_image}: {e}")
    else:
        print(f"❌ Missing headshot for {name} ({source_image})")

print("✅ All player frames saved as PNG successfully!")

## Name Frames

In [22]:
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import os

# Directories
base_dir = "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03 three_pointers"
output_dir = os.path.join(base_dir, "name_frames")  # Folder for name frames

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Function to generate name frames for each row in f_top_shooters.csv
def generate_player_name_frames(file_path="f_top_shooters.csv", height=720, width=1280, 
                                output_dir=output_dir, font_size=51, font_type="ExtraBold", 
                                font_color=(255, 255, 255), font_outline_width=3, 
                                font_outline_color=(0, 0, 0)):
    """
    Reads f_top_shooters.csv, extracts names & ranks (matching row count), and generates PNG frames.
    Format: 'Rank. Name' (e.g., '30. Tim Hardaway Jr.').
    Saves in reverse order (30th-ranked player first, 1st-ranked player last).
    
    Parameters:
        file_path (str): Path to f_top_shooters.csv.
        height (int): Image height in pixels.
        width (int): Image width in pixels.
        output_dir (str): Folder to save output frames.
        font_size (int): Size of the text font.
        font_type (str): Font weight (e.g., "Regular", "Bold", "ExtraBold").
        font_color (tuple): RGB color of the text (default: white).
        font_outline_width (int): Thickness of text outline.
        font_outline_color (tuple): RGB color of text outline (default: black).
    """
    # Load player data
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        return
    
    df_top_shooters = pd.read_csv(file_path)

    # Ensure required columns exist
    if "name" not in df_top_shooters.columns or "rank" not in df_top_shooters.columns:
        print("Error: f_top_shooters.csv must contain 'name' and 'rank' columns.")
        return

    # Sort by rank in **ascending** order (1 → 30) and keep full row count
    df_top_shooters = df_top_shooters.sort_values("rank", ascending=True)

    # Extract names & ranks while keeping row order, then **REVERSE the full dataset**
    player_labels = [f"{rank}. {name.upper()}" for rank, name in zip(df_top_shooters["rank"], df_top_shooters["name"])][::-1]

    # Define Montserrat font path
    font_path = os.path.join("..", "Montserrat", f"Montserrat-{font_type}.ttf")

    # Try to load the selected font, fallback to default if missing
    try:
        font = ImageFont.truetype(font_path, font_size)
    except IOError:
        print(f"Montserrat font '{font_type}' not found. Using default font.")
        font = ImageFont.load_default()

    # Generate a frame for each row in f_top_shooters (duplicating names accordingly)
    for frame_index, player_label in enumerate(player_labels, start=1):
        # Create an empty image with transparent background
        img = Image.new("RGBA", (width, height), color=(0, 0, 0, 0))
        draw = ImageDraw.Draw(img)

        # Position the text at the center
        text_bbox = draw.textbbox((0, 0), player_label, font=font)
        text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
        text_position = ((width - text_width) // 2, (height - text_height) // 2)

        # Add outline effect
        if font_outline_width > 0:
            for dx in range(-font_outline_width, font_outline_width + 1):
                for dy in range(-font_outline_width, font_outline_width + 1):
                    if dx != 0 or dy != 0:  # Skip center position
                        draw.text((text_position[0] + dx, text_position[1] + dy), player_label, font=font, fill=font_outline_color)

        # Draw the main text
        draw.text(text_position, player_label, font=font, fill=font_color)

        # Construct file name (e.g., 0001.png, 0002.png) in **REVERSE order**
        file_name = f"{frame_index:06d}.png"
        file_path = os.path.join(output_dir, file_name)

        # Save the image
        img.save(file_path, "PNG")

        print(f"✅ Saved frame for: {player_label} → {file_name}")

    print(f"✅ Name frames saved in: {output_dir}")

# Run the function with Montserrat ExtraBold
generate_player_name_frames(file_path="f_top_shooters.csv", height=720, width=1280, 
                            font_size=51, font_type="ExtraBold", 
                            font_color=(255, 255, 255), font_outline_width=3, 
                            font_outline_color=(0, 0, 0))

✅ Saved frame for: 30. TIM HARDAWAY JR. → 000001.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000002.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000003.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000004.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000005.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000006.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000007.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000008.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000009.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000010.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000011.png
✅ Saved frame for: 30. TIM HARDAWAY JR. → 000012.png
✅ Saved frame for: 29. KOBE BRYANT → 000013.png
✅ Saved frame for: 29. KOBE BRYANT → 000014.png
✅ Saved frame for: 29. KOBE BRYANT → 000015.png
✅ Saved frame for: 29. KOBE BRYANT → 000016.png
✅ Saved frame for: 29. KOBE BRYANT → 000017.png
✅ Saved frame for: 29. KOBE BRYANT → 000018.png
✅ Saved frame for: 29. KOBE BRYANT → 000019.png
✅ Saved frame for: 29. KOBE 

## Text

In [21]:
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import os

# Function to generate and save individual frames displaying Season, Total 3PM, and Record
def generate_text_frames(file_path="f_top_shooters.csv", height=720, width=1280, 
                         output_dir="/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03 three_pointers/text_frames", 
                         font_size=51, font_type="ExtraBold", 
                         font_color=(255, 255, 255), font_outline_width=3, 
                         font_outline_color=(0, 0, 0), line_spacing_factor=2.0):
    """
    Reads f_top_shooters.csv and generates frames displaying:
    - Season
    - Total 3PM
    - Record (highest Total 3PM seen up until that frame)
    
    Frames are saved as .png images in the specified directory.
    """
    # Ensure output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Load data
    df_top_shooters = pd.read_csv(file_path)

    # Rename column for clarity
    df_top_shooters.rename(columns={"3p_cum": "three_point_cum"}, inplace=True)

    # Ensure required columns exist
    required_cols = {"season", "three_point_cum"}
    if not required_cols.issubset(df_top_shooters.columns):
        print(f"Error: The file must contain {required_cols} columns.")
        return

    # Load Montserrat font
    font_path = os.path.join("..", "Montserrat", f"Montserrat-{font_type}.ttf")
    try:
        font = ImageFont.truetype(font_path, font_size)
    except IOError:
        print(f"Montserrat font {font_type} not found. Using default font.")
        font = ImageFont.load_default()

    # Sort data from last to first ranked player
    df_top_shooters.sort_values(["rank", "season"], ascending=[False, True], inplace=True)

    record_3p = 0  # Track highest Total 3PM seen so far
    frame_index = 1

    # Iterate through the data and generate images
    for _, row in df_top_shooters.iterrows():
        season = row["season"]
        total_3pm = row["three_point_cum"]  # Extract cumulative 3-pointers
        
        # Update record if new value is highest seen so far
        record_3p = max(record_3p, total_3pm)

        # Create an empty image with transparent background
        img = Image.new("RGBA", (width, height), color=(0, 0, 0, 0))
        draw = ImageDraw.Draw(img)

        # Define text values
        text_lines = [
            f"Season: {season}",
            f"Total 3PM: {total_3pm}",
            f"Record: {record_3p}"
        ]

        # Calculate line spacing
        line_height = font_size * line_spacing_factor

        # Position text at the top left with padding
        x_position = 40  # Left-aligned
        y_position = 40  # Start at the top with some padding

        # Draw each line with spacing
        for text in text_lines:
            # Draw text outline
            for dx in range(-font_outline_width, font_outline_width + 1):
                for dy in range(-font_outline_width, font_outline_width + 1):
                    if dx != 0 or dy != 0:
                        draw.text((x_position + dx, y_position + dy), text, font=font, fill=font_outline_color)

            # Draw main text
            draw.text((x_position, y_position), text, font=font, fill=font_color)

            # Move down for the next line
            y_position += line_height

        # Construct the filename
        file_name = f"{frame_index:06d}.png"
        file_path = os.path.join(output_dir, file_name)

        # Save the image
        img.save(file_path, "PNG")
        print(f"✅ Saved frame for Season {season} → {file_name}")

        # Increment frame index
        frame_index += 1

    print(f"✅ Text frames saved in: {output_dir}")

# Run the function
generate_text_frames()

✅ Saved frame for Season 1 → 000001.png
✅ Saved frame for Season 2 → 000002.png
✅ Saved frame for Season 3 → 000003.png
✅ Saved frame for Season 4 → 000004.png
✅ Saved frame for Season 5 → 000005.png
✅ Saved frame for Season 6 → 000006.png
✅ Saved frame for Season 7 → 000007.png
✅ Saved frame for Season 8 → 000008.png
✅ Saved frame for Season 9 → 000009.png
✅ Saved frame for Season 10 → 000010.png
✅ Saved frame for Season 11 → 000011.png
✅ Saved frame for Season 12 → 000012.png
✅ Saved frame for Season 1 → 000013.png
✅ Saved frame for Season 2 → 000014.png
✅ Saved frame for Season 3 → 000015.png
✅ Saved frame for Season 4 → 000016.png
✅ Saved frame for Season 5 → 000017.png
✅ Saved frame for Season 6 → 000018.png
✅ Saved frame for Season 7 → 000019.png
✅ Saved frame for Season 8 → 000020.png
✅ Saved frame for Season 9 → 000021.png
✅ Saved frame for Season 10 → 000022.png
✅ Saved frame for Season 11 → 000023.png
✅ Saved frame for Season 12 → 000024.png
✅ Saved frame for Season 13 → 0000