In [None]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False
import os 
import pandas as pd
import dateutil
import sklearn.preprocessing as preprocess
import numpy as np
import pdb
if IN_COLAB:
    !pip install nba_api 
    from nba_api.stats.static import teams
    from nba_api.stats.endpoints import leaguegamefinder
    import io
pd.options.display.max_columns  = 1000

## Load the Cleaned Data

In [None]:
if IN_COLAB:
    from google.colab import files
    uploaded = files.upload()

In [None]:
if IN_COLAB:
    f = "cleanNBA.csv"
    df = pd.read_csv(io.BytesIO(uploaded[f]))
else:
    fn = os.path.join("data", "cleanNBA.csv")
    df = pd.read_csv(fn)
print(df.shape)
display(df.head())

# Preprocess

### Remove uncecessary columns and add home vs. away

In [None]:
#need to extract home vs away info
def extractHome(df, matchCol, homeCol, awayCol):
    df = df.copy(deep=True)
    df[homeCol] = 0
    df[awayCol] = 0
    sym = np.array([s.split()[1] for s in df[matchCol]], copy=False)
    homeMask = sym == 'vs.'
    df.loc[homeMask, homeCol] = 1
    df.loc[np.logical_not(homeMask), awayCol] = 1
    return df

df_drop = extractHome(df, 'MATCHUP','HOME', 'AWAY')
#let's drop some unnecessary columns
cols2drop = ['TEAM_NAME','REB','MATCHUP'] 
#i'll keep some that may be useful later or may just drop them later
df_drop = df_drop.drop(cols2drop,axis=1)
df_drop.rename({'MIN': 'MINUTES'},axis='columns',inplace=True)

In [None]:
df_drop.head()

### Replace Fine Season ID's with coarser ones

In [None]:
def cleanSeasons(df, szIDCol, newSzIDCol, playOffCol, regCol):
    """SeasonIDs look like _Year where _ can be 1, 2, or 4
    It seems like 1 is pre-new year, 2 is post-new-year and 4 is playoffs
    We're going to reset seasonID so that a szn can be from Oct-June
    and set a playoff flag
    """
    df = df.copy(deep=True)
    sznIDs = pd.unique(df[szIDCol])
    sznIDsStr = sznIDs.astype(str)
    year = [int(ID[1:]) for ID in sznIDsStr]
    df[newSzIDCol] = 0 #allocate
    df[playOffCol] = 0 #allocate
    df[regCol] = 1
    for i, oldID in enumerate(sznIDs):
        newID = year[i]
        mask = df[szIDCol] == oldID
        df.loc[mask, newSzIDCol] = newID
        if str(oldID)[0] == '4':
            #playoffs
            df.loc[mask, playOffCol] = 1
            df.loc[mask, regCol] = 0
    return df
df_seas = cleanSeasons(df_drop, 'SEASON_ID','SZSTART', 'PLAYOFF','REG')

In [None]:
df_seas.head()

### Construct time features 
one hot encode the day of the week, and insert days_since_season_begain, and game_since_season_began (team specific)

In [None]:
def eng_timeFeats(df, dateCol='Date', sznIDCol=None, daySznCol=None,
                    dayOfWeek=False, teamCol='Team', gSznCol=None):
    """ Engineer time features. 
    Extract various time features and insert into df.
    Features:
        day of week 
        day since season start 
        that team's game since season start
        will convert szn id to numeric index [0, numSzn-1]
    Returns
        converted df
    """
    ndf = df.copy(deep=True)
    ndf[dateCol] = df[dateCol].apply(dateutil.parser.parse)
    if dayOfWeek:
        dayOfWeek = ndf[dateCol].dt.dayofweek.values
        ohe = preprocess.OneHotEncoder(sparse=False,categories='auto')
        ohe_dayOfWeek = ohe.fit_transform(dayOfWeek.reshape(-1,1))
        day_cols = ['S','M','T','W','Th','F','Sa']
        ndf[day_cols] = pd.DataFrame(np.zeros(ohe_dayOfWeek.shape))
        ndf[day_cols] = ohe_dayOfWeek
    
    if daySznCol is not None:
        assert sznIDCol is not None, "Must provide season ID col"
        assert gSznCol is not None, "May as well do games too" 
        ndf.sort_values(by=dateCol, inplace=True)
        ndf[daySznCol] = 0 #allocate
        ndf[gSznCol] = 0
        uniqueSzns = pd.unique(ndf[sznIDCol])
        uniqueTeams = pd.unique(ndf[teamCol])
        for szID in uniqueSzns:
            inSznMask = ndf[sznIDCol] == szID
            firstDate = ndf.loc[inSznMask, dateCol].iloc[0]
            ndf.loc[inSznMask, daySznCol] = (ndf.loc[inSznMask, dateCol]
                                            - firstDate).dt.days.values
            for team in uniqueTeams:
                inTeamMask = ndf[teamCol] == team
                inSzinTeam = inTeamMask & inSznMask
                ndf.loc[inSzinTeam, gSznCol] = np.arange(sum(inSzinTeam))
                
            
    return ndf
df_t = eng_timeFeats(df_seas,'GAME_DATE','SZSTART','DsinceSzn',
                                     True, 'TEAM_ID','GsinceSzn')

In [None]:
df_t.head()

### One hot encode the teams

In [None]:
def ohe_teams(df, teamCol):
    df = df.copy(deep=True)
    teams = df[teamCol].values
    ohe = preprocess.OneHotEncoder(sparse=False)
    ohe_teams = ohe.fit_transform(teams.reshape(-1,1))
    df[ohe.categories_[0]] = pd.DataFrame(np.zeros(ohe_teams.shape)) #workaround to add multiple columns 
    df[ohe.categories_[0]] = ohe_teams
    return df, ohe
df_oheT,ohe_encoder = ohe_teams(df_t, 'TEAM_ABBREVIATION')

### Scale and Reorder

In [None]:
numCols = ['MINUTES','PTS','FGM', 'FGA','FG3M','FG3A','FTM','FTA',
          'OREB','DREB','AST','STL','BLK','TOV','PF','SZSTART','DsinceSzn','GsinceSzn']
team_names = ohe_encoder.categories_[0]
oheCols = ['HOME','AWAY', 'PLAYOFF','REG','S','M','T','W','Th','F','Sa'] + team_names.tolist()
cols2scale = numCols + oheCols
scaler = preprocess.MinMaxScaler()
scaler.fit(df_oheT[cols2scale])
df_oheT[cols2scale] = scaler.transform(df_oheT[cols2scale])
#reorder
helperCols = ['SEASON_ID','TEAM_ID','TEAM_ABBREVIATION','GAME_ID','GAME_DATE']
new_cols = helperCols + numCols + oheCols
clean_df = df_oheT[new_cols]
clean_df.head()

In [None]:
def transScalar(val, col, colsScaled, scaler):
    """
    Utility function to transform a scalar using the scaler.
    """
    idx = colsScaled.index(col)
    temp = np.zeros((1,len(colsScaled)))
    temp[0, idx] = val
    transformed = scaler.transform(temp)
    return transformed[0, idx]
def transCol(valCol, nameCol, colsScaled, scaler):
    """
    Utility function to transform a column vector using the scaler
    """
    idx = colsScaled.index(nameCol)
    valColLen = max(valCol.shape)
    temp = np.zeros((valColLen, len(colsScaled)))
    temp[:, idx] = valCol
    transformed = scaler.transform(temp)
    return transformed[:, idx]

In [None]:
def ridDupCols(df, cols, drop_suff, rn_suff, inplace=True):
    """
    Get rid of duplicate columns caused by merge
    """
    drop_cols = [c + drop_suff for c in cols]
    rn_cols = {c + rn_suff : c for c in cols}
    if inplace:
        df.drop(drop_cols,axis=1, inplace=inplace)
        df.rename(columns=rn_cols, inplace=inplace)
        return None
    else:
        df = df.drop(drop_cols,axis=1, inplace=inplace)
        df = df.rename(columns=rn_cols, inplace=inplace)
        return df

### Merge data so one row = one game

We will really only be using this as a reference to construct the histories and get the point differential

In [None]:
#now would like to merge them based on gameID
home_mask= df_t['HOME'] == 1
away_mask = np.logical_not(home_mask)

home_df = df_t[home_mask]
away_df = df_t[away_mask]

merge_df = home_df.merge(away_df, 'inner',on='GAME_ID',
                         suffixes=('_H','_A'))
dup_cols = ['SEASON_ID','GAME_DATE','SZSTART','S','M','T',
            'W','Th','F','Sa','DsinceSzn', 'PLAYOFF','REG']
ridDupCols(merge_df, dup_cols, drop_suff='_A',rn_suff='_H')
merge_df.sort_values(by='GAME_DATE', inplace=True)



In [None]:
merge_df.head()

In [None]:
#also have to scale the appropiate columns from merged that we will use in currMatchup
mergeScaled_df = merge_df.copy()
curr_commcols = ['SZSTART','PLAYOFF','REG','S','M','T','W','Th','F','Sa','DsinceSzn']
currIcols = ['GsinceSzn']
for col in curr_commcols:
    if col in cols2scale:
        mergeScaled_df[col]= transCol(mergeScaled_df[col].values, col, cols2scale,scaler)
for col in currIcols:
    for suff in ("_H","_A"):
        if col in cols2scale:
            mergeScaled_df[col+suff] = transCol(mergeScaled_df[col+suff].values, col, cols2scale, scaler)
mergeScaled_df.iloc[-10:]

### Form histories for each team for each game

Given a game with a home team and an away team, retrieve the stats from the last N games for each team (who each team
played and the stats of those games). If Ngames is np.inf, we'll store the season history for each team. This means the sequence size will be variable. 

In [None]:
clean_df.shape

In [None]:
#we want the following:
#for each game we want the past N game history for each team
#need to divide up into teamIdx and seasIdx
#we're going to store the information 'twice' to make code simpler
team_col = 'TEAM_ID'
sz_col = 'SZSTART'
uteams = pd.unique(df_t[team_col])
uszns = pd.unique(df_t[sz_col])
team_dfs = np.zeros((len(uteams),len(uszns)), dtype=object)
team2idx = {uteam: idx for idx, uteam in enumerate(uteams)}
sz2idx = {uszn: idx for idx, uszn in enumerate(uszns)}
common_cols = ['HOME','AWAY','SZSTART','PLAYOFF','REG','S','M','T','W','Th','F','Sa','DsinceSzn','MINUTES']
              #'SEASON_ID','GAME_DATE']
for teamIdx, team in enumerate(uteams):
    teamMask = df_t[team_col] == team
    for szIdx, sz in enumerate(uszns):
        szMask = df_t[sz_col] == sz
        team_mask = teamMask & szMask
        non_team_mask = np.logical_not(teamMask) & szMask
        tempNonTeam = clean_df[non_team_mask].copy()
        tempTeam = clean_df[team_mask].copy()
        merged = tempTeam.merge(tempNonTeam, 'inner',on='GAME_ID',suffixes=('_M','_S'))
        merged = ridDupCols(merged,common_cols, '_S','_M', inplace=False)
        team_dfs[teamIdx, szIdx] = merged #storing a view
        



In [None]:

merge_df[merge_df['GAME_ID']==20601230]

In [None]:
minG = 20
Ngames = 20
#make Ngames inf to have "rolling" history
teamColH = team_col + "_H"
teamColA = team_col + "_A"
gb4_col = 'GsinceSzn'
gb4_colH =  gb4_col +"_H"
gb4_colA =  gb4_col +"_A"

sample_df = team_dfs[0,0]
ind_cols = [n for n in numCols if n not in common_cols] + team_names.tolist()
ind_cols = [c + suff for suff in ('_M','_S') for c in ind_cols]
#convert these to int idxs
commonColsI = [sample_df.columns.get_loc(c) for c in common_cols]
indColsI = [sample_df.columns.get_loc(c) for c in ind_cols]
colI = commonColsI + indColsI

curr_cols = curr_commcols +  [c + suff for suff in ('_H','_A') for c in currIcols]

minGmask =((merge_df[gb4_colH] > minG) &
           (merge_df[gb4_colA] > minG))
numSets = sum(minGmask)
h_hists = [None]*numSets #Ngame histories for the home team of currentMatchup
a_hists = [None]*numSets# "" away
currMatchup = [None]*numSets #stats that would be known prior to the game (date played, G/DsinceSzn, etc.) [not teams bc that's contained in histories]
ys = [None]*numSets #point differential of currMatchup
count = 0
SZNtracker = [None]*numSets
for index, row in merge_df.loc[minGmask].iterrows():
    szIdx = sz2idx[row[sz_col]]
    SZNtracker[count] = row[sz_col]
    teamIdxH = team2idx[row[teamColH]]
    teamIdxA = team2idx[row[teamColA]]
    h_df = team_dfs[teamIdxH, szIdx]
    a_df = team_dfs[teamIdxA, szIdx]
    
    gb4_H = row[gb4_colH]
    gb4_A = row[gb4_colA]
    
    diffHistL = gb4_H - gb4_A
    stIdx_A = max(0, gb4_A - Ngames)
    stIdx_H = max(0, gb4_H - Ngames)
    if diffHistL < 0:
        #away team has more history
        stIdx_A = max(-diffHistL, gb4_A - Ngames)
    elif diffHistL > 0:
        stIdx_H = max(diffHistL, gb4_H - Ngames)
    
    h_hists[count] = h_df.iloc[stIdx_H:gb4_H, colI] 
    a_hists[count] = a_df.iloc[stIdx_A:gb4_A, colI]
    
    currMatchup[count] = mergeScaled_df.loc[index, curr_cols]
    ys[count] = row['PTS_H'] - row['PTS_A']
    count+=1 
    if count % 1000 == 0:
        print("{} / {}".format(count, numSets))
print("{} / {}".format(count, numSets))

In [None]:
currMatchup[10]

In [None]:
h_hists[10]

In [None]:
a_hists[10]

In [None]:
ys[10]

In [None]:
SZNtracker[10]

In [None]:
numSets

In [None]:
#now going to make everything numpy arrays and concatenate the home and away histories
hist = [None]*numSets
currMatchVals = [None]*numSets
for i in range(numSets):
    currMatchVals[i] = currMatchup[i].values
    hist[i] = np.hstack((h_hists[i].values, a_hists[i].values))

# Start Pytorch

We now have (home_hist, away_hist) and (home_points_scored - away_points_scored) as x and y. Home hist and away hist
are views of the previous N games for each team, where N can be variable across sequences (but must be the same for home hist and away hist). 

The idea is that we will feed the concatenated histories into the RNN, concatenate the currMatchup with the output
of the RNN and then feed that into a single layer to do the regression. The RNN will learn the best function to perform over the histories (a naive guess would be the average stats over the last N games is the best function to predict the outcome in the curr Matchup). 

In [None]:
import torch
#ok first have to define a dataset
class NBADataset(torch.utils.data.Dataset):
    """
    Class to retreive y, currMatchup, history
    """
    
    def __init__(self,matches_list, histories_list, ys_list, SZNtracker, device) :
        """
        THEY SHOULD BE SORTED IN TIME; We also do not copy, so this data should be 
        considered read only
        
        maches_list[i] occured before matches_list[i+1]
        matches_list - each element is a vector containing matchup info
                        - day of week, days since season begain, playoff game etc. 
        histories_list - each element is a 2d array (Ngames, Nfeatures) which consists of 
                        the home team's and away team's stats over the last N games. 
        ys_list - each element is a point differential (home - away)
        SZNtracker - list of seasons (starting year)
        
        For every game, the matches list has info on the game, the histories has info on how each team in the
        game has performed over their last N games, the ys list has the outcome of the game, and the szntracker
        knows in what season each game occurs.
        """
        assert len(matches_list) == len(histories_list) == len(ys_list)== len(SZNtracker),"all inputs must have same length!"
        self.len = len(matches_list)
        self.device = device
        self.match, self.hists, self.ys = self.tensorify(matches_list, 
                                                         histories_list, ys_list)
        self.szns = np.array(SZNtracker)
        
    def tensorify(self,matches_list, histories_list, ys_list):
        """
        Convert matches_list and ys_list to tensors. 
        Convert the elements of histories_list to tensors, but
        hists will still be an array of tensors (bc uneven lengths)
        """
        try:
            match = torch.as_tensor(np.array(matches_list,dtype=float, 
                                         copy=False)).float().to(device=self.device)
            ys = torch.reshape(torch.as_tensor(ys_list).float().to(device=self.device),
                           (-1,1))
          #look at shapes, see if even or uneven lengths
            numRows = np.fromiter((arr.shape[0] for arr in histories_list), dtype=int,
                              count=self.len)
            if (np.all(numRows[0] == numRows)):
                #can just make this one giant tensor
                hists = torch.as_tensor(histories_list).float().to(device=self.device)
            else:
                #cannot make one giant tensor
                raise ValueError("wont work for now")
                #hists = np.array([torch.as_tensor(h).float().to(device=self.device) 
                #          for h in histories_list], dtype=object, copy=False)
        except TypeError:
                #we must be splitting and they're already tensors
                match = matches_list
                hists = histories_list
                ys = ys_list
        return match, hists, ys

    def split(self, szn, frac):
        """
        Split the dataset into thre more datasets:
         1) the data that occurs before the point defined by 
         $frac % through season $szn. 
         2) The data that occurs after X % through each season after szn
         3) the leftovers (the data that occurs before X % through each season after szn)
        
        This will allow us to split the data into training and testing sets. 
        Say we want to test on the latter halfs of seasons 2018, 2019 and 2020. 
        We call self.split(2018, .5). The first dataset will be training data prior
        to 2018.5 . The second dataset will be the tets dataset (the second halfs of 
        2018, 2019, 20202), and the third dataset will be the first halfs of 2019, 
        and 2020. We can't train on this data and test on the later half of 2018 because
        we'll be showing information form the future that it normally wouldn't have. 
        Theoretically we could test on this data, but there won't be as much history 
        built up. 
        """
        
        assert frac >=0 and frac <= 1, "frac must be [0,1]"
        
        szns = np.unique(self.szns)
        szns.sort()
        szns2split = szns[szns >= szn]
        if len(szns2split) == 0:
            return None
        firstIdxs = [None]*(len(szns2split)+1)
        splitIdxs = [None]*len(szns2split)
        for i, szn2sp in enumerate(szns2split):
            inSznMask = self.szns == szn2sp
            lenSzn = sum(inSznMask)
            firstIdxs[i] = np.argmax(inSznMask)
            splitIdxs[i] = firstIdxs[i] + int(frac*lenSzn)
        firstIdxs[i+1] = self.len
        tokeep = []
        leftOver = []
        for i in range(len(splitIdxs)):
            start = firstIdxs[i]
            split = splitIdxs[i]
            end = firstIdxs[i+1]
            tokeep.append(np.arange(split,end))
            if i > 0:
                leftOver.append(np.arange(start, split))
        
        keepIdx = np.concatenate(tokeep)
        rmIdx = np.concatenate(leftOver)
        #test dataset
        dtest = NBADataset(self.match[keepIdx],
                          self.hists[keepIdx],
                            self.ys[keepIdx],
                           self.szns[keepIdx],
                           device)
        dLeftOver = NBADataset(self.match[rmIdx],
                              self.hists[rmIdx],
                              self.ys[rmIdx],
                              self.szns[rmIdx],
                               device)
        #training dataset
        idx = splitIdxs[0]
        dtrain = NBADataset(self.match[:idx],
                           self.hists[:idx], 
                           self.ys[:idx], 
                           self.szns[:idx],
                            device)
        return dtrain, dtest, dLeftOver
        
        
    def __len__(self):
         return self.len
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        return self.match[idx, :], self.hists[idx], self.ys[idx]
        

In [None]:
#now for the model
#Now for the model 
import torch.nn as nn
class RNNregressor(nn.Module):
    def __init__(self, input_dim, hidden_dim,output_dim,currFeats,
                                     n_layers=1, dropP=0,batch_first=False):
        super(RNNregressor, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.input_dim = input_dim
        self.batch_first = batch_first
        self.gru = nn.GRU(input_dim, hidden_dim, n_layers,dropout=dropP,
                          batch_first=batch_first)
        self.dense = nn.Linear(hidden_dim+currFeats, output_dim)
        #self.dense = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        if self.batch_first:
            self.indexFN = self.indexBatchFirst
        else:
            self.indexFN = self.indexSeqFirst
    
    def indexBatchFirst(self,out):
        return out[:,-1,:]
    def indexSeqFirst(self, out):
        return out[-1,:,:]
    
    def forward(self, x, curr):
        out, _ = self.gru(x) #out will be [N, L, output_dim]
        outVec = self.indexFN(out)
        l_in = torch.cat((curr, outVec),axis=1) #last sequence, last layer, all in batch
        return self.dense(self.relu(l_in))

In [None]:
def train(loader, model,loss_fn,opt, epochs):
    losses = [None]*len(loader)*epochs
    count = 0
    model.train()
    for epoch in range(epochs):
        for curr,hist,y in loader:
            yhat = model(hist, curr)
            loss = loss_fn(yhat, y)
            loss.backward()
            opt.step()
            opt.zero_grad()
            losses[count] = loss.item()
            count+=1 
        print("epoch", epoch)
    return model, losses

def test(loader, model, loss_fn):
    losses = [None]*len(loader)
    running_right = 0
    running_total = 0
    model.eval()
    with torch.no_grad():
        for i, (curr, hist, y) in enumerate(loader):
            yhat = model(hist, curr)
            losses[i] = loss_fn(yhat, y).item()
            yhatnp = yhat.cpu().numpy()
            ynp = y.cpu().numpy()
            correctMask = ynp*yhatnp > 0
            running_right = sum(correctMask)
            running_total = len(correctMask)
        return losses, running_right/running_total



In [None]:
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
num_workers=0
dataset = NBADataset(currMatchVals, hist,ys, SZNtracker, device)
#dtrain, dtest, dleft = dataset.split(2017, .5)
train_per = 80
test_per = 20
train_len = int(train_per/100.0*count)

test_len =count - train_len
torch.manual_seed(27)

dtrain, dtest= random_split(dataset,[train_len, test_len]) 
train_load = DataLoader(dtrain, batch_size=20, num_workers=num_workers)
test_load = DataLoader(dtest, batch_size=20,num_workers=num_workers)

In [None]:
input_dim = 256
hidden_dim = 100
output_dim = 1
currMatchup_dim = 13
n_layers = 1
dropP = 0
batchFirst = True

In [None]:
epochs = 13
lr = .005

train_losses = []
test_losses = []
wl_accs = []

rnn = RNNregressor(input_dim, hidden_dim, output_dim, currMatchup_dim,
                  n_layers, dropP, batchFirst)
rnn.to(device=device)
opt = torch.optim.Adam(rnn.parameters(), lr=lr)
loss_fn = torch.nn.MSELoss()

for i in range(epochs):
    rnn, losses = train(train_load, rnn, loss_fn, opt, 1)
    test_loss, test_wl_acc = test(test_load, rnn, loss_fn)
    train_losses.append(np.mean(losses))
    test_losses.append(np.mean(test_loss))
    wl_accs.append(test_wl_acc)
    print("train/testloss | acc: {} / {} | {}".format(np.mean(losses), np.mean(test_loss), test_wl_acc))


In [None]:
import matplotlib.pyplot as plt
fs= 16
fig = plt.figure()
ax = fig.add_subplot(111)
ep = np.arange(len(train_losses))+1
ax.plot(ep, train_losses, label='train')
ax.plot(ep, test_losses, label="test")
ax.legend(fontsize=14)
ax.axvline(8, linestyle='--', alpha=.5,color='k') #0 indexed
ax.set_xlabel("Epoch", fontsize=fs)
ax.set_ylabel("Loss", fontsize=fs)
ax.set_title("Loss vs. Training Epoch", fontsize=fs)
fig.savefig("train_test.png", bbox_inches="tight")

In [None]:
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
acs = np.array(wl_accs)*np.max(train_losses)
ax2.plot(ep, wl_accs)
ax2.set_ylim([0,1])
ax2.set_xlabel("Epoch", fontsize=fs)
ax2.set_ylabel("W/L Accuracy", fontsize=fs)
ax2.set_title("W/L Accuracy vs. Training Epoch", fontsize=fs)
ax2.axvline(8, linestyle='--', alpha=.5, color='k')
fig2.savefig("wl_acc.png", bbox_inches="tight")

In [None]:
#figure out where the images are saved in google Colab
os.listdir()

In [None]:
if IN_COLAB:
    files.download('train_test.png')
    files.download('wl_acc.png')