In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tkinter as tk
from tkinter import ttk
import tempfile
from PIL import Image, ImageTk
import os
import plotly.io as pio

In [2]:
pd.set_option('display.max_columns', None)
data = pd.read_excel('nba_player_data.xlsx')

# Data cleaning & analysis

In [3]:
data.drop(columns =['RANK', 'EFF'], inplace = True) #去掉「排名」和「效率」欄（不重要
data['season_start_year'] = data['Year'].str[:4].astype(int)

data['TEAM'].replace(to_replace=['NOP', 'NOH'], value='NO',inplace=True) 
#Team 'NOP' & 'NOH'are the same team (changed in 2013), so we merge them into 'NO'

data['Season_type'].replace('Regular%20Season', 'Regular Season', inplace=True)
# change the index to RS --> 例行賽

rs_df = data[data['Season_type']=='Regular Season']
playoffs_df = data[data['Season_type']=='Playoffs']

total_cols = ['MIN','FGM','FGA','FG3M','FG3A','FTM','FTA',
              'OREB','DREB','REB','AST','STL','BLK','TOV','PF','PTS']
#篩選我要的數據

# 1. How player stat are correlated with each other

In [4]:
#以球員為單位，整理這十年在total_cols欄位裡的數據
data_per_min = data.groupby(['PLAYER','PLAYER_ID','Year'])[total_cols].sum().reset_index()
#我們將收集的是總數據-->除以「總比賽時數」--> made/attempt -->整理出「進階數據」
for col in data_per_min.columns[4:]:
    data_per_min[col] = data_per_min[col]/data_per_min['MIN']
#Advance stat
data_per_min['FG%'] = data_per_min['FGM']/data_per_min['FGA']      #field goal percentage
data_per_min['3PT%'] = data_per_min['FG3M']/data_per_min['FG3A']   #3-point field goal percentage
data_per_min['FT%'] = data_per_min['FTM']/data_per_min['FTA']      #free throw percentage
data_per_min['FG3A%'] = data_per_min['FG3A']/data_per_min['FGA']   #3-point attempt percentage
data_per_min['PTS/FGA'] = data_per_min['PTS']/data_per_min['FGA']  #scoring efficiency
data_per_min['FG3M/FGM'] = data_per_min['FG3M']/data_per_min['FGM'] #3-point efficiency
data_per_min['FTA/FGA'] = data_per_min['FTA']/data_per_min['FGA']   #free-throw attempt per field goal attempt
data_per_min['TRU%'] = 0.5*data_per_min['PTS']/(data_per_min['FGA']+0.475*data_per_min['FTA']) #true-shooting percentage
data_per_min['AST_TOV'] = data_per_min['AST']/data_per_min['TOV'] #Turnover ratio

#篩選球員總比賽時數<50分鐘
data_per_min = data_per_min[data_per_min['MIN']>=50]
#不需要做player id的相關性
data_per_min.drop(columns='PLAYER_ID', inplace=True)

#plot graph
fig_1 = px.imshow(data_per_min.corr(numeric_only=True))

# 2. How are minutes played distributed?

In [5]:
#球員在「例行賽」的總分鐘數「百分比分佈」
fig_2_1 = px.histogram(x=rs_df['MIN'], histnorm = 'percent')
#球員在「季候賽」的總分鐘數「百分比分佈」
fig_2_2 = px.histogram(x=playoffs_df['MIN'], histnorm = 'percent')

fig_2_1=fig_2_1.update_layout(xaxis_title='Total Minutes Played (per Regular Season)')
fig_2_2=fig_2_2.update_layout(xaxis_title='Total Minutes Played (per Playoffs)')

In [6]:
# 時數篩選function
def hist_data(df=rs_df, min_MIN=0, min_GP=0):
    return df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'MIN']/    df.loc[(df['MIN']>=min_MIN) & (df['GP']>=min_GP), 'GP']

fig_2_3 = go.Figure()
fig_2_3.add_trace(go.Histogram(x = hist_data(rs_df, 50 ,5), histnorm='percent', name='Regular Season',
                          xbins = {'start':0, 'end':46, 'size':1}))
fig_2_3.add_trace(go.Histogram(x = hist_data(playoffs_df, 5, 1), histnorm='percent', 
                           name='Playoffs',xbins = {'start':0, 'end':46, 'size':1}))

fig_2_3.update_layout(barmode='overlay')
fig_2_3.update_layout(xaxis_title='Total Minutes Played')
fig_2_3.update_layout(yaxis_title='Percentage %')

fig_2_3.update_traces(opacity=0.5)

#時數介於12~34分鐘佔比率（RS & Playsoffs）
rs_12to34_min_percentage = ((hist_data(rs_df,50,5)>=12)&(hist_data(rs_df,50,5)<=34)).mean()
playoffs_12to34_min_percentage = ((hist_data(playoffs_df,5,1)>=12)&(hist_data(playoffs_df,5,1)<=34)).mean()

# GUI

In [7]:
import tkinter as tk
from tkinter import ttk
import pandas as pd
import tempfile
from PIL import Image, ImageTk
import os
import plotly.io as pio


# 讀取數據文件
gui_data = pd.read_excel('nba_player_data_per_game.xlsx')
gui_data.drop(columns=['RANK', 'EFF', 'PLAYER_ID', 'TEAM_ID'], inplace=True)
gui_data['TEAM'].replace(to_replace=['NOP', 'NOH'], value='NO', inplace=True)
gui_data['Season_type'].replace('Regular%20Season', 'Regular Season', inplace=True)
new_columns = gui_data.columns.tolist()
new_columns.insert(6, new_columns.pop(22))
gui_data = gui_data.reindex(columns=new_columns)

temp_data = gui_data.copy()
temp_data.drop(columns=['Year', 'Season_type'], inplace=True)

fig_1 = fig_1
fig_2 = fig_2_3

def search_player_data():
    # 獲取選擇的年份、賽季類型和球員名稱
    year = year_combobox.get()
    season_type = season_type_combobox.get()
    player_name = player_name_entry.get()

    # 根據篩選條件選取數據
    player_data = gui_data[(gui_data['Year'] == year) &
                           (gui_data['Season_type'] == season_type) &
                           (gui_data['PLAYER'].str.contains(player_name, case=False))]

    # 清空表格內容
    result_treeview.delete(*result_treeview.get_children())

    # 插入篩選結果到表格中
    for index, row in player_data.iterrows():
        values = row.tolist()
        # 移除第0, 1列的數據
        values.pop(0)
        values.pop(0)
        result_treeview.insert('', tk.END, values=values)

current_column = None

def sort_column(col):
    current_sort = result_treeview.heading(col)["text"]
    global current_column

    if current_column is not None:
        result_treeview.heading(current_column, text=current_column)

    if current_sort == "▼":
        result_treeview.heading(col, text="▲")
        reverse = False
    else:
        result_treeview.heading(col, text="▼")
        reverse = True

    data = [(result_treeview.set(child, col), child) for child in result_treeview.get_children('')]
    data.sort(key=lambda x: float(x[0]) if x[0].replace(".", "", 1).isdigit() else float('inf'), reverse=reverse)
    for index, (value, child) in enumerate(data):
        result_treeview.move(child, '', index)

    current_column = col

window = tk.Tk()
window.title("NBA Player Data Search & Analysis")

search_title_label = tk.Label(window, text="NBA Players Stat", font=("Helvetica", 20, "bold"))
search_title_label.pack(pady=10)

input_frame = tk.Frame(window)
input_frame.pack()

year_label = tk.Label(input_frame, text="Year:")
year_label.pack(side="left")
year_combobox = ttk.Combobox(input_frame, values=sorted(gui_data['Year'].unique()))
year_combobox.pack(side="left")

season_type_label = tk.Label(input_frame, text="Season Type:")
season_type_label.pack(side="left")
season_type_combobox = ttk.Combobox(input_frame, values=sorted(gui_data['Season_type'].unique()))
season_type_combobox.pack(side="left")

player_name_label = tk.Label(input_frame, text="Player Name:")
player_name_label.pack(side="left")
player_name_entry = tk.Entry(input_frame)
player_name_entry.pack(side="left")

search_button = tk.Button(window, text="Search", command=search_player_data)
search_button.pack()

result_treeview = ttk.Treeview(window, columns=list(gui_data.columns[2:]), show='headings')

for column in temp_data.columns[0:2]:
    result_treeview.heading(column, text=column, anchor="center")
    result_treeview.column(column, width=130, anchor="center")

for column in temp_data.columns[2:]:
    result_treeview.heading(column, text=column, anchor="center", command=lambda col=column: sort_column(col))
    result_treeview.column(column, width=130, anchor="center")

result_treeview.pack()


title_label = tk.Label(window, text="NBA Data Analysis", font=("Helvetica", 20, "bold"))
title_label.pack(pady=10)  # 增加10像素的垂直間距

subtitle_frame = tk.Frame(window)
subtitle_frame.pack()

subtitle_label = tk.Label(subtitle_frame, text="1. How player stat are correlated with each other", font=("Helvetica", 18))
subtitle_label.pack(side="left")  # 調整左邊間距為10像素

subtitle_1_label = tk.Label(subtitle_frame, text="2. How are minutes played distributed?", font=("Helvetica", 18))
subtitle_1_label.pack(side="left")

# 將 fig_1 轉換為 PNG 圖片
fig_1_path = "fig1.png"
pio.write_image(fig_1, fig_1_path)

# 將 fig_2 轉換為 PNG 圖片
fig_2_path = "fig2.png"
pio.write_image(fig_2, fig_2_path)

# 顯示圖片在標籤下方
image_1 = Image.open(fig_1_path)
image_1.thumbnail((640, 450))
image_tk_1 = ImageTk.PhotoImage(image_1)
subtitle_label.config(image=image_tk_1, compound="bottom", pady=10)

image_2 = Image.open(fig_2_path)
image_2.thumbnail((640, 450))
image_tk_2 = ImageTk.PhotoImage(image_2)
subtitle_1_label.config(image=image_tk_2, compound="bottom", pady=10)

# 記得在需要使用圖片時保留對應的 ImageTk 物件的參考
subtitle_label.image = image_tk_1
subtitle_1_label.image = image_tk_2

window.mainloop()