<a href="https://colab.research.google.com/github/ianjamesbarnett/2022NFLBigDataBowl/blob/main/retyardFFNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import io
import torchvision as tv 
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pickle
import gc



In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
data_dir = "/content/drive/MyDrive/NFLBigDataBowl2022"
os.chdir(data_dir)

YEAR="2018";maxplays=756
#YEAR="2019";maxplays=749
#YEAR="2020";maxplays=855


filein=open(data_dir+"/tracking"+YEAR+".csv",'r')
line=filein.readline()

filein2=open(data_dir+"/games.csv",'r')
filein2.readline()
game_d={}
for line in filein2:
    line_v=[x.strip('\"') for x in line.strip("\n").split(",")]
    game_d[line_v[0]]=line_v[1:]

filein3=open(data_dir+"/plays.csv",'r')
filein3.readline()
play_d={}
for line in filein3:
    line_v=[x.strip('\"') for x in line.strip("\n").split(",")]
    play_d[line_v[0]+"-"+line_v[1]]=line_v[2:]


In [None]:
def PlotFieldPosition(temp):
    plotdata = {'x':[],'y':[],'s':[],'a':[],'o':[],'dir':[],'jerseyNumber':[],'position':[],'team':[],'color':[]}
    for key in temp:
        plotdata['x'].append(float(temp[key][0]))
        plotdata['y'].append(float(temp[key][1]))
        plotdata['s'].append(float(temp[key][2]))
        plotdata['a'].append(float(temp[key][3]))
        plotdata['o'].append(temp[key][5])
        plotdata['dir'].append(temp[key][6])
        plotdata['jerseyNumber'].append(temp[key][7])
        plotdata['position'].append(temp[key][8])
        plotdata['team'].append(temp[key][9])
        if temp[key][9].strip("\"")=="football":
            plotdata['color'].append(0)    
        elif temp[key][9].strip("\"")=="away":
            plotdata['color'].append(1)    
        else:
            plotdata['color'].append(2)    
    plt.scatter('y','x',c='color',data=plotdata)
    plt.show()

#PlotFieldPosition(pret[key1])  


In [None]:






# get IDs of all plays to turn into videos (returned punts and kicks)
catchframe = {}
pID_s=set()
filein.seek(0)
for line in filein:
    line_v=line.split(",")
    if line_v[8].strip("\"")=="punt_received" or line_v[8].strip("\"")=="kick_received":
        gpID = line_v[15]+"-"+line_v[16]
        if play_d[gpID][6]=="Return":
            pID_s.add(gpID)
            catchframe[gpID]=line_v[14]


ret_vids=pickle.load(open("retvids1_final_"+YEAR+".pickle",'rb'))
ret_vids2=pickle.load(open("retvids2_final_"+YEAR+".pickle",'rb'))

# Create multi-channel images

pIDs=list(ret_vids.keys())


In [None]:
len(pIDs)

756

In [None]:


# Get outcome variable for a given play
# If the outcome is yards gained then this is a function of the frame as well.
def GetOutcomeVar(plID,frID,ret_vids):
    #subtract yards from catch to current position from total yards
    # NA:condition on no penalty
    if play_d[plID][15] != 'NA':
        return('NA')
    ENDYARDLINE=float(ret_vids[plID][str(len(ret_vids[plID]))]['NA'][0])
    CURYARDLINE=float(ret_vids[plID][frID]['NA'][0])
    if ret_vids[plID]['1']['NA'][10].strip("\"")=='right':
        return(CURYARDLINE-ENDYARDLINE)
    else:
        return(ENDYARDLINE-CURYARDLINE)

def os2xyspeed(o,s):
  if o<90:
    ot=o*2*np.pi/360
    x=s*np.sin(ot)
    y=s*np.cos(ot)
  elif o<180:
    ot=(180-o)*2*np.pi/360
    x=s*np.sin(ot)
    y=-s*np.cos(ot)
  elif o<180:
    ot=(o-180)*2*np.pi/360
    x=-s*np.sin(ot)
    y=-s*np.cos(ot)
  else:
    ot=(360-o)*2*np.pi/360
    x=-s*np.sin(ot)
    y=s*np.cos(ot)
  return([x,y])


def GetClosePlayers(player,plID,frID,ret_vids,radius=250):
  curd=ret_vids[plID][frID]
  ofIDs=list(curd.keys())
  cx0=float(curd[player][0])
  cy0=float(curd[player][1])
  distv=[]
  xrelv=[]
  yrelv=[]
  idv=[]
  for id in ofIDs:
    if id=='NA':
      continue
    xrel=float(curd[id][0])-cx0
    yrel=float(curd[id][1])-cy0
    curdist=np.sqrt(xrel**2+yrel**2)
    if curdist>radius:
      continue
    distv.append(curdist)
    idv.append(id)
    xrelv.append(xrel)
    yrelv.append(yrel)
  distv=np.array(distv)
  yrelv=np.array(yrelv)
  xrelv=np.array(xrelv)
  idv=np.array(idv)
  sortperm=np.argsort(distv)
  #distv[np.argsort(distv)]
  #idv[np.argsort]
  idv=idv[sortperm]
  distv=distv[sortperm]
  yrelv=yrelv[sortperm]
  xrelv=xrelv[sortperm]
  return({'idv':idv,'distv':distv,'xrelv':xrelv,'yrelv':yrelv})

In [None]:
# determine if x values need to be swapped or not
# find player closest to ball
# relx, rely, of ball to ball carrier
# x,y,xspeed, yspeed, acceleration, of ball carrier
# relx, rely, dist, xspeed, yspeed, acceleration, kicking_team_boolean, SORT BY DISTANCE TO BALL

def GetFieldStateFeatures(plID,frID,ret_vids):
  teamkick=play_d[plID][4]
  homeACR=game_d[plID.split("-")[0]][4]
  awayACR=game_d[plID.split("-")[0]][5]
  if teamkick==homeACR:
    homekick=True
  else:
    homekick=False
  revx=False
  if  ret_vids[plID]['1']['NA'][10].strip("\"")=='right': # reverse x coordinates for all objects if playdirection==right
    revx=True
  gcpout=GetClosePlayers('NA',plID,frID,ret_vids,radius=1000)
  bholder=gcpout['idv'][0]
  bx=float(ret_vids[plID][frID]['NA'][0])
  by=float(ret_vids[plID][frID]['NA'][1])
  cx=float(ret_vids[plID][frID][bholder][0])
  cy=float(ret_vids[plID][frID][bholder][1])
  cs=float(ret_vids[plID][frID][bholder][2])
  ca=float(ret_vids[plID][frID][bholder][3])
  cdis=float(ret_vids[plID][frID][bholder][4])
  co=float(ret_vids[plID][frID][bholder][5])
  temp=os2xyspeed(co,cs)
  cxspd=temp[0]
  cyspd=temp[1]
  if revx:
    cxspd=-cxspd
    bx=120-bx
    cx=120-cx
  x=[]
  x+=[cx-bx,cy-by]
  x+=[cx/120,cy/120,cxspd,cyspd,ca]
  for i in range(len(gcpout['idv'])-1):
    cID=gcpout['idv'][i+1]
    cx=float(ret_vids[plID][frID][cID][0])
    cy=float(ret_vids[plID][frID][cID][1])
    cs=float(ret_vids[plID][frID][cID][2])
    ca=float(ret_vids[plID][frID][cID][3])
    cdis=float(ret_vids[plID][frID][cID][4])
    co=float(ret_vids[plID][frID][cID][5])# relx, rely, dist, xspeed, yspeed, acceleration, kicking_team_boolean, SORT BY DISTANCE TO BALL
    temp=os2xyspeed(co,cs)
    cxspd=temp[0]
    cyspd=temp[1]
    if revx:
      cxspd=-cxspd
      cx=120-cx
    cretind=0
    if (homekick and ret_vids[plID][frID][cID][9].strip("\"")=="home") or ((not homekick) and ret_vids[plID][frID][cID][9].strip("\"")=="away"):
      cretind=1
    x+=[(cx-bx)/20,(cy-by)/20,gcpout['distv'][i+1]/13,cxspd,cyspd,ca,cretind]
  return(x)
  

In [None]:
x=[]
y=[]
for i in range(len(pIDs)):
    plID=pIDs[i]
    if play_d[plID][15]!='NA':
        continue
    for j in list(ret_vids[plID].keys()):
        frID=j
        if float(frID)<float(catchframe[plID]):
            continue
        y.append(torch.tensor(GetOutcomeVar(plID,frID,ret_vids)))
        x.append(GetFieldStateFeatures(plID,frID,ret_vids))
        y.append(torch.tensor(GetOutcomeVar(plID,frID,ret_vids2)))
        x.append(GetFieldStateFeatures(plID,frID,ret_vids2))
#        temp=GenRGBFromPlayFrame(plID,frID,dpi=30)
#        x.append(temp.to_sparse())


        


In [None]:
len(y)

85550

In [None]:
xt = torch.tensor(x)
yt = torch.tensor(y)
trainset=torch.utils.data.TensorDataset(xt[:int(len(xt)*.9)],yt[:int(len(xt)*.9)])
testset=torch.utils.data.TensorDataset(xt[int(len(xt)*.9):],yt[int(len(xt)*.9):])

batch_size=64
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,shuffle=True, num_workers=0)




#in_d=pickle.load(open("vid3ch"+YEAR+".pickle",'rb'))
#xt=in_d['xt']
#yt=in_d['yt']

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(154, 200)
        self.fc2 = nn.Linear(200,100)
        self.fc3 = nn.Linear(100,20)
        self.fc4 = nn.Linear(20, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)#.squeeze()
        return x
    
    

net = Net()


In [None]:
#modelpath=data_dir+"/expretyardCNN.pt"
#net.load_state_dict(torch.load(modelpath))

<All keys matched successfully>

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=0.00001)#, momentum=0.95)
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.95)
testcor=[]
traincor=[]
for epoch in range(30):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs=inputs.float()
        labels=labels.float()
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        #print(i)
        if i % 1000 == 999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 1000))
            running_loss = 0.0
#    curtraincor=tensorcorr(trainloader,net)
#    curtestcor=tensorcorr(testloader,net)
#    print("testcor: ", curtestcor, "; traincor: ",curtraincor)
#    traincor.append(curtraincor)
#    testcor.append(curtestcor)
#    torch.save(net.state_dict(), modelpath)
#    print(testerrv(net,trainloader))
    testerr=testerrv(net,testloader)
    print(testerr)
    #torch.save(net.state_dict(),data_dir+"/retyardFFNN_"+str(epoch)+".pt")
print('Finished Training') #4.8 loss


[1,  1000] loss: 216.675
0.7142729466120298
[2,  1000] loss: 95.502
0.7297981418153273
[3,  1000] loss: 87.224
0.7474864820915064
[4,  1000] loss: 83.108
0.7630074809911911
[5,  1000] loss: 79.432
0.7759060184474604
[6,  1000] loss: 77.215
0.7854404384914736
[7,  1000] loss: 74.940
0.7914113635507115
[8,  1000] loss: 72.717
0.7958922673757396
[9,  1000] loss: 72.099
0.7986157829134582
[10,  1000] loss: 71.264
0.8003711401899959
[11,  1000] loss: 69.800
0.801152130688168
[12,  1000] loss: 71.196
0.8019188310764724
[13,  1000] loss: 69.709
0.8025603310349116
[14,  1000] loss: 69.750
0.8023191666545577
[15,  1000] loss: 69.719
0.8017175258488263
[16,  1000] loss: 68.950
0.8020959296016454
[17,  1000] loss: 69.175
0.8017282496216965
[18,  1000] loss: 67.970
0.8014919088512902
[19,  1000] loss: 68.151
0.8013941499685501
[20,  1000] loss: 67.480
0.8012355556794104
[21,  1000] loss: 67.447
0.8013451124792818
[22,  1000] loss: 67.525
0.8009516004199355
[23,  1000] loss: 67.844
0.80102352005467

In [None]:
def testerrv(net,testloader):
  outerr=[]
  testout=np.array([])
  trainout=np.array([])
  with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            images=images.float()
            labels=labels.float()
            outputs = net(images).squeeze()
            # the class with the highest energy is what we choose as prediction
            #print(outputs,labels)
            runtot=0
            denom=float(labels.shape[0])
            for i in range(int(denom)):
              runtot+=abs(labels[i]-outputs[i])/denom
            outerr.append(float(runtot))
            testout=np.append(testout,torch.Tensor.numpy(outputs))
            trainout=np.append(trainout,torch.Tensor.numpy(labels))
  return(np.corrcoef(testout,trainout)[0,1])
#return(sum(outerr)/len(outerr))

In [None]:
testout=np.array([1])
testout=np.append(testout,np.array([1,2,3]))
testout

array([1, 1, 2, 3])

In [None]:
print(labels)
print(outputs)




tensor([ 5.4000e-01,  0.0000e+00,  3.7800e+00, -2.5400e+00,  4.1860e+01,
         9.9700e+00,  1.8650e+01,  6.2000e-01,  0.0000e+00,  2.2970e+01,
         0.0000e+00,  4.5100e+01, -3.0000e-02,  3.9000e+00,  1.5970e+01,
         1.5740e+01,  1.0010e+01,  2.0000e-02,  2.6000e-01,  2.4170e+01,
         6.7800e+00,  1.9650e+01,  1.0640e+01,  2.1570e+01,  7.0000e-02,
        -5.4000e-01, -1.0000e-02,  9.2500e+00,  2.1930e+01,  2.4810e+01,
         6.0000e-02,  1.6100e+00,  1.8150e+01,  2.1600e+01,  3.7400e+01,
         1.2170e+01,  2.3310e+01,  3.5600e+00,  6.2500e+00,  1.6830e+01,
         1.2300e+00,  5.2500e+00,  6.5000e-01])
tensor([ 1.9336, -1.4558,  1.0436, -2.2306, 21.0048, 12.9656, 15.8990, -0.5845,
         0.0492, 24.1577,  1.9121, 24.8923,  0.5963,  2.2941, 23.6604, 13.8248,
        17.0922, -0.7219,  3.4843, 23.2827,  5.6545, 20.5369, 12.2276, 24.2661,
        -0.8533,  2.4294,  0.3244, 11.6152, 21.1209, 25.6489, -0.8077, -0.9449,
        12.4514, 15.6657, 18.5422, 17.7215, 20.6