In [1]:
import pandas as pd
import os
os.chdir('/home/guest/d11922022/github/nba')

In [3]:
def group_by_year(nba, head_n: int = None, last_n:int = None, nth: int = None):
    if head_n is not None and last_n is None and nth is None:
        return nba.groupby('Year').head(head_n)
    elif last_n is not None and head_n is None and nth is None:
        return nba.groupby('Year').tail(last_n)
    elif nth is not None and head_n is None and last_n is None:
        return nba.groupby('Year').nth(nth)
    else:
        raise ValueError("Please specify only one of head_n, last_n, or nth.")
    
def split_every_decade(nba):
    decades = {}
    for year in range(1980, 2030, 10):
        decade_start = year
        decade_end = year + 9
        decades[f"{decade_start}s"] = nba[(nba['Year'] >= decade_start) & (nba['Year'] <= decade_end)]
    return decades

def split_trend_decade(nba):
    trends = {}
    for year in range(1980, 2030, 10):
        decade_start = year
        trends[f"{decade_start}s"] = nba[(nba['Year'] >= decade_start)]
    return trends

def p(df1, df2, prefix: str = ''):
    odd = len(df1) / len(df2) * 100
    print(f"{prefix}總數:{len(df2)}, 符合數:{len(df1)}, 機率{odd:.03f}%")
    return len(df1) / len(df2)

def info(df, columns = ["Year", "Champ", "Oppo", "Game", "HomePTS", "GuestPTS", "TotalPTS", "Diff", "History"]):
    return df[columns]

In [40]:
def check_small_score(df, threshold=228.5, func=split_trend_decade):
    dfs = func(df)

    for decade in ['1980s', '1990s', '2000s', '2010s', '2020s']:
        small_df = dfs[decade][dfs[decade].TotalPTS < threshold]
        p(small_df, dfs[decade], f"year >= {decade}: ")


def check_score_more(df, threshold=228.5, func=split_trend_decade):
    pivot = df.pivot(index='Year', columns='Game', values='TotalPTS')
    less = pivot[2] < pivot[1]
    df = [df.Year.isin(less)]

    dfs = func(df)
    for decade in ['1980s', '1990s', '2000s', '2010s', '2020s']:
        small_df = dfs[decade][dfs[decade].TotalPTS < threshold]
        p(small_df, dfs[decade], f"year >= {decade}: ")

In [38]:
file = 'NBAFinals.csv'
nba = pd.read_csv(file)
first_game = group_by_year(nba, nth=0)
second_game = group_by_year(nba, nth=1)
third_game = group_by_year(nba, nth=2)
forth_game = group_by_year(nba, nth=3)

threshold = 228.5
#check_small_score(first_game, threshold=threshold) 
check_small_score(second_game, threshold=threshold) 
#check_small_score(third_game, threshold=threshold) 
#check_small_score(forth_game, threshold=threshold) 


print()
check_small_score(second_game, threshold=threshold, func=split_every_decade) 

year >= 1980s: 總數:45, 符合數:41, 機率91.111%
year >= 1990s: 總數:35, 符合數:33, 機率94.286%
year >= 2000s: 總數:25, 符合數:23, 機率92.000%
year >= 2010s: 總數:15, 符合數:13, 機率86.667%
year >= 2020s: 總數:5, 符合數:4, 機率80.000%

year >= 1980s: 總數:10, 符合數:8, 機率80.000%
year >= 1990s: 總數:10, 符合數:10, 機率100.000%
year >= 2000s: 總數:10, 符合數:10, 機率100.000%
year >= 2010s: 總數:10, 符合數:9, 機率90.000%
year >= 2020s: 總數:5, 符合數:4, 機率80.000%


In [94]:
G1_home_lose_years = nba[(nba.Game==1) & (nba.HomeWin==False)].Year

g1_lose = nba[nba.Year.isin(G1_home_lose_years)]
g1_lose_second_game = group_by_year(g1_lose, nth=1)
check_small_score(g1_lose, threshold=threshold)
check_small_score(g1_lose_second_game, threshold=threshold)

year >= 1980s: 總數:57, 符合數:49, 機率85.965%
year >= 1990s: 總數:37, 符合數:35, 機率94.595%
year >= 2000s: 總數:22, 符合數:22, 機率100.000%
year >= 2010s: 總數:12, 符合數:12, 機率100.000%
year >= 2020s: 總數:6, 符合數:6, 機率100.000%
year >= 1980s: 總數:10, 符合數:9, 機率90.000%
year >= 1990s: 總數:7, 符合數:7, 機率100.000%
year >= 2000s: 總數:4, 符合數:4, 機率100.000%
year >= 2010s: 總數:2, 符合數:2, 機率100.000%
year >= 2020s: 總數:1, 符合數:1, 機率100.000%


In [28]:
info(g1_lose_second_game)

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
13,1982,Lakers,Sixers,2,110,94,204,16,lw
23,1984,Celtics,Lakers,2,124,121,245,3,lw
48,1988,Lakers,Pistons,2,108,96,204,12,lw
64,1991,Bulls,Lakers,2,107,86,193,21,lw
75,1993,Bulls,Suns,2,108,111,219,-3,ll
88,1995,Rockets,Magic,2,106,117,223,-11,ll
121,2001,Lakers,Sixers,2,98,89,187,9,lw
136,2004,Pistons,Lakers,2,99,91,190,8,lw
182,2012,Heat,Thunder,2,96,100,196,-4,wl
239,2022,Warriors,Celtics,2,107,88,195,19,lw


In [95]:
g1_lose = nba[nba.Year.isin(G1_home_lose_years)]

In [None]:
years = second_game.Year.reset_index(drop=True)
second_game_smaller_years = years[second_game.reset_index().TotalPTS - first_game.reset_index().TotalPTS <= 0]
second_game_larger_years = years[second_game.reset_index().TotalPTS - first_game.reset_index().TotalPTS > 0]

second_game_smaller = nba[nba.Year.isin(second_game_smaller_years)]
second_game_larger = nba[nba.Year.isin(second_game_larger_years)]

Unnamed: 0,Year,Champ,Game,Win,Home,MP,FG,FGA,FGP,TP,...,OppoPTS,Diff,HomeWin,HomeChamp,History,HomePTS,GuestPTS,TotalPTS,HomeGuestDiff,SeriesLastGame
12,1982,Lakers,1,1,0,240,49,93,0.527,0,...,117,-7,False,False,l,117,124,241,-7,False
13,1982,Lakers,2,0,0,240,35,83,0.422,0,...,110,16,True,False,lw,110,94,204,16,False
14,1982,Lakers,3,1,1,240,50,91,0.549,1,...,108,21,True,False,lwl,108,129,237,-21,False
15,1982,Lakers,4,1,1,240,45,97,0.464,0,...,101,10,True,False,lwll,101,111,212,-10,False
16,1982,Lakers,5,0,0,240,47,100,0.47,0,...,135,33,True,False,lwllw,135,102,237,33,False
17,1982,Lakers,6,1,1,240,47,87,0.54,0,...,104,10,True,False,lwllwl,104,114,218,-10,True
22,1984,Celtics,1,0,1,240,41,94,0.436,1,...,115,0,False,True,l,109,115,224,-6,False
23,1984,Celtics,2,1,1,265,45,98,0.459,0,...,121,3,True,True,lw,124,121,245,3,False
24,1984,Celtics,3,0,0,240,40,101,0.396,2,...,137,33,True,True,lwl,104,137,241,-33,False
25,1984,Celtics,4,1,0,265,48,111,0.432,2,...,125,-4,False,True,lwlw,129,125,254,4,False


In [100]:
info(g1_lose[g1_lose.Year.isin(second_game_smaller_years) & (g1_lose.Game==2)])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
13,1982,Lakers,Sixers,2,110,94,204,16,lw
88,1995,Rockets,Magic,2,106,117,223,-11,ll
121,2001,Lakers,Sixers,2,98,89,187,9,lw
182,2012,Heat,Thunder,2,96,100,196,-4,wl
239,2022,Warriors,Celtics,2,107,88,195,19,lw


In [101]:
info(g1_lose[g1_lose.Year.isin(second_game_larger_years) & (g1_lose.Game==2)])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
23,1984,Celtics,Lakers,2,124,121,245,3,lw
48,1988,Lakers,Pistons,2,108,96,204,12,lw
64,1991,Bulls,Lakers,2,107,86,193,21,lw
75,1993,Bulls,Suns,2,108,111,219,-3,ll
136,2004,Pistons,Lakers,2,99,91,190,8,lw


In [8]:
info(first_game[first_game.TotalPTS > threshold])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
12,1982,Lakers,Sixers,1,117,124,241,-7,l
29,1985,Lakers,Celtics,1,148,114,262,34,w
41,1987,Lakers,Celtics,1,126,113,239,13,w
87,1995,Rockets,Magic,1,118,120,238,-2,l
216,2018,Warriors,Cavaliers,1,124,114,238,10,w


In [9]:
info(second_game[second_game.TotalPTS > threshold])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
23,1984,Celtics,Lakers,2,124,121,245,3,lw
42,1987,Lakers,Celtics,2,141,122,263,19,ww
212,2017,Warriors,Cavaliers,2,132,113,245,19,ww
227,2020,Lakers,Heat,2,124,114,238,10,ww


In [10]:
info(third_game[third_game.TotalPTS > threshold])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
14,1982,Lakers,Sixers,3,108,129,237,21,lwl
24,1984,Celtics,Lakers,3,104,137,241,33,lwl
31,1985,Lakers,Celtics,3,111,136,247,25,wll
76,1993,Bulls,Suns,3,129,121,250,-8,llw
213,2017,Warriors,Cavaliers,3,118,113,231,-5,www
222,2019,Raptors,Warriors,3,123,109,232,-14,wlw


In [11]:
info(forth_game[forth_game.TotalPTS > threshold])

Unnamed: 0,Year,Champ,Oppo,Game,HomePTS,GuestPTS,TotalPTS,Diff,History
25,1984,Celtics,Lakers,4,129,125,254,-4,lwlw
117,2000,Lakers,Pacers,4,120,118,238,-2,wwlw
214,2017,Warriors,Cavaliers,4,116,137,253,21,wwwl


In [12]:
d = second_game
d1980 = d[d.Year>=1980]
d1990 = d[d.Year>=1990]
d2000 = d[d.Year>=2000]
d2010 = d[d.Year>=2010]
d2020 = d[d.Year>=2020]

d1980_small = d1980[d1980.Year>=1980]
d1990_small = d1990[d1990.Year>=1990]
d2000_small = d2000[d2000.Year>=2000]
d2010_small = d2010[d2010.Year>=2010]
d2020_small = d2020[d2020.Year>=2020]

In [46]:
p(last2_1990_small, last2_1990), p(last2_2000_small, last2_2000), p(last2_2010_small, last2_2010), p(last2_2020_small, last2_2020),

53 68 0.7794117647058824
36 48 0.75
19 28 0.6785714285714286
6 8 0.75


(0.7794117647058824, 0.75, 0.6785714285714286, 0.75)

In [51]:
nba[nba.History == 'www'].Year.unique()

array([1983, 1989, 1996, 2002, 2007, 2017, 2018])

In [50]:
nba[nba.Year.isin(nba[nba.History == 'www'].Year.unique())]

Unnamed: 0,Year,Champ,Game,Win,Home,MP,FG,FGA,FGP,TP,...,Oppo,OppoPTS,Diff,HomeWin,HomeChamp,History,HomePTS,GuestPTS,TotalPTS,HomeGuestDiff
18,1983,Sixers,1,1,1,240,45,96,0.469,0,...,Lakers,107,6,True,True,w,113,107,220,6
19,1983,Sixers,2,1,1,240,40,83,0.482,0,...,Lakers,93,10,True,True,ww,103,93,196,10
20,1983,Sixers,3,1,0,240,45,94,0.479,0,...,Lakers,94,-17,False,True,www,111,94,205,17
21,1983,Sixers,4,1,0,240,42,81,0.519,0,...,Lakers,108,-7,False,True,wwww,115,108,223,7
54,1989,Pistons,1,1,1,240,46,83,0.554,1,...,Lakers,97,12,True,True,w,109,97,206,12
55,1989,Pistons,2,1,1,240,44,81,0.543,1,...,Lakers,105,3,True,True,ww,108,105,213,3
56,1989,Pistons,3,1,0,240,43,83,0.518,1,...,Lakers,110,-4,False,True,www,114,110,224,4
57,1989,Pistons,4,1,0,240,34,70,0.486,2,...,Lakers,97,-8,False,True,wwww,105,97,202,8
91,1996,Bulls,1,1,1,240,37,86,0.43,7,...,SuperSonics,90,17,True,True,w,107,90,197,17
92,1996,Bulls,2,1,1,240,30,77,0.39,4,...,SuperSonics,88,4,True,True,ww,92,88,180,4
