In [1]:
# 범례 한글 적용
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
teams = pd.read_csv("teams.csv")
games_details = pd.read_csv("games_details.csv")

In [None]:
# ARENACAPACITY 값이 Null인 행만 필터링
null_rows = teams[teams["ARENACAPACITY"].isnull()]

# 주요 컬럼만 보기 좋게 출력
null_rows[["TEAM_ID", "ABBREVIATION", "NICKNAME", "CITY", "ARENA", "ARENACAPACITY"]]

In [None]:
import pandas as pd

teams = pd.read_csv("teams.csv")

catalog = []

for col in teams.columns:
    col_info = {
        "컬럼명": col,
        "타입": str(teams[col].dtype),  # 문자열로 변환하면 보기 편함
        "Null 개수": teams[col].isnull().sum(),
        "Null 비율(%)": round(teams[col].isnull().mean()*100, 2)
    }
    if pd.api.types.is_numeric_dtype(teams[col]):
        col_info.update({
            "min": teams[col].min(),
            "25%": teams[col].quantile(0.25),
            "50%": teams[col].quantile(0.5),
            "75%": teams[col].quantile(0.75),
            "max": teams[col].max()
        })
    catalog.append(col_info)

catalog_teams = pd.DataFrame(catalog)

# 타입/Null 비율만 따로 저장
type_null_table = catalog_teams[["컬럼명", "타입", "Null 비율(%)"]]

# CSV 저장
type_null_table.to_csv("catalog_types_nulls.csv", index=False, encoding="utf-8-sig")

# Excel 저장
type_null_table.to_excel("catalog_types_nulls.xlsx", index=False, engine="openpyxl")

type_null_table


In [None]:
# PLUS_MINUS가 0 이상인 선수만 필터링
pm_nonnegative = games_details[games_details["PLUS_MINUS"] >= 0]

# GAME_ID 앞 5자리 추출 → 시즌 ID
games_details["SEASON_ID"] = games_details["GAME_ID"].astype(str)
pm_nonnegative["SEASON_ID"] = pm_nonnegative["GAME_ID"].astype(str)

unique_players = pm_nonnegative.drop_duplicates(subset=["SEASON_ID", "PLAYER_ID"])


# 시즌별로 0 이상 PLUS_MINUS 선수 수 집계
pm_count_by_season = (
    pm_nonnegative.groupby("SEASON_ID")["PLAYER_ID"]
    .count()
    .reset_index(name="unique_players")
)

pm_count_by_season

In [None]:
# 2022 시즌만 필터 (앞 6자리 == "222021")
games_details_2022 = games_details[games_details["GAME_ID"].astype(str).str[:4] == "2220"]

# PLUS_MINUS >= 0인 선수만
pm_nonnegative_2022 = games_details_2022[games_details_2022["PLUS_MINUS"] >= 0]

# (1) 고유 선수 수 (중복 제거)
unique_players_2022 = pm_nonnegative_2022.drop_duplicates(subset=["PLAYER_ID"])
print("2022 시즌 PLUS_MINUS ≥ 0 고유 선수 수:", unique_players_2022["PLAYER_ID"].nunique())

# (2) 총 건수 (중복 포함)
count_with_duplicates = len(pm_nonnegative_2022)
print("2022 시즌 PLUS_MINUS ≥ 0 총 건수(중복 포함):", count_with_duplicates)

In [None]:
import pandas as pd

teams = pd.read_csv("teams.csv")

# 창단 연도별 팀 수 집계
year_counts = (
    teams.groupby("YEARFOUNDED")["TEAM_ID"]
    .count()
    .reset_index(name="team_count")
    .sort_values("YEARFOUNDED")
)

print(year_counts)

# 시각화 (막대그래프)
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
plt.bar(year_counts["YEARFOUNDED"], year_counts["team_count"])
plt.title("창단 연도별 팀 수")
plt.xlabel("창단 연도")
plt.ylabel("팀 수")
plt.show()


In [3]:
import pandas as pd

# 1) 데이터 로드
ranking = pd.read_csv("ranking.csv")

# 2) 날짜 전처리
ranking["STANDINGSDATE"] = pd.to_datetime(ranking["STANDINGSDATE"], errors="coerce")
ranking = ranking.dropna(subset=["STANDINGSDATE"]).sort_values(["SEASON_ID", "STANDINGSDATE"])

# 3) 날짜별 순위 산출
ranking["Rank"] = ranking.groupby(["SEASON_ID", "STANDINGSDATE"])["W_PCT"] \
               .rank(ascending=False, method="first")

# 4) 시즌 시작연도 추출 + 컷오프 날짜(다음 해 4/15)
def season_start_year(season_id):
    s = str(season_id)
    if len(s) >= 4 and s[-4:].isdigit():
        return int(s[-4:])
    return int(s)

ranking["_SEASON_START"] = ranking["SEASON_ID"].apply(season_start_year)
ranking["_CUT_OFF"] = pd.to_datetime((ranking["_SEASON_START"] + 1).astype(str) + "-04-15")

# 5) 최근 5시즌만 추출
recent_starts = sorted(ranking["_SEASON_START"].unique())[-5:]
ranking_recent = ranking[ranking["_SEASON_START"].isin(recent_starts)].copy()

# 6) Detroit Pistons만 필터링
det = ranking_recent[ranking_recent["TEAM"].str.contains("Detroit", case=False)].copy()

# 7) 시즌별로 컷오프 이전 마지막 순위만 추출
det_final = []
for season, g in det.groupby("SEASON_ID"):
    cutoff = g["_CUT_OFF"].iloc[0]
    g2 = g[g["STANDINGSDATE"] <= cutoff]
    if g2.empty:
        continue
    last = g2.sort_values("STANDINGSDATE").iloc[-1]
    det_final.append({
        "SEASON_ID": season,
        "SEASON_LABEL": f"{season_start_year(season)}-{str(season_start_year(season)+1)[-2:]}",
        "TEAM": last["TEAM"],
        "Rank": int(last["Rank"])
    })

det_final_ranking = pd.DataFrame(det_final)

print(det_final_ranking)

   SEASON_ID SEASON_LABEL     TEAM  Rank
0      12018      2018-19  Detroit    24
1      12019      2019-20  Detroit    12
2      12020      2020-21  Detroit    17
3      12021      2021-22  Detroit    16
4      12022      2022-23  Detroit    30
5      22018      2018-19  Detroit    16
6      22019      2019-20  Detroit    26
7      22020      2020-21  Detroit    28
8      22021      2021-22  Detroit    28
9      22022      2022-23  Detroit    30
