# NFL Transfers

In [1]:
import json
import core_constants as cc
import functions as fx
import pandas as pd
import sqlite3 as sql
import recordlinkage

## Build out DataFrames

In [2]:
SQL = '''SELECT * from Transfers247'''
df_247 = (fx.connDBAndReturnDF(SQL)).set_index('IDYR')
df_247.index.name = '247_IDYR'
df_247

Unnamed: 0_level_0,ID,PlayerName,Year,StandardizedPosition,KeyPositionGroup
247_IDYR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
aaronbanks_notredame_2017,aaronbanks_notredame,aaronbanks,2017,OL,1
aaronbennett_texastech_2013,aaronbennett_texastech,aaronbennett,2013,OL,1
aaroncochran_california_2013,aaroncochran_california,aaroncochran,2013,OL,1
aaronepps_louisville_2011,aaronepps_louisville,aaronepps,2011,OL,1
aaronevans_centralflorida_2013,aaronevans_centralflorida,aaronevans,2013,OL,1
...,...,...,...,...,...
ulatolutau_wisconsin_2014,ulatolutau_wisconsin,ulatolutau,2014,FB,1
willieross_toledo_2016,willieross_toledo,willieross,2016,FB,1
winstondimel_kansasstate_2014,winstondimel_kansasstate,winstondimel,2014,FB,1
wyattshallman_michigan_2013,wyattshallman_michigan,wyattshallman,2013,FB,1


In [8]:
SQL = '''SELECT * from UnlinkedNFL'''
df_nfl = (fx.connDBAndReturnDF(SQL)).set_index('ID')
df_nfl.index.name = 'NFL_ID'
df_nfl

Unnamed: 0_level_0,PlayerName,Year,KeyPositionGroup,StandardizedPosition
NFL_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
aaronmellette_,aaronmellette,2013,1,WR
adamjennings_fresnostate,adamjennings,2006,1,WR
airesecurrie_clemson,airesecurrie,2005,1,WR
alanbonner_,alanbonner,2013,1,WR
andreroberts_,andreroberts,2010,1,WR
...,...,...,...,...
roberthunt_,roberthunt,2005,1,OL
ryancook_newmexico,ryancook,2006,1,OL
scottmruczkowski_bowlinggreen,scottmruczkowski,2005,1,OL
deonsimon_,deonsimon,2015,2,DL


In [4]:
indexer = recordlinkage.Index()
indexer = recordlinkage.BlockIndex(on=['KeyPositionGroup'])

In [5]:
pairs = indexer.index(df_nfl, df_247)

In [6]:
from functions import YearNFL

sumFields = []
c = recordlinkage.Compare()
c.string('PlayerName', 'PlayerName', method='damerau_levenshtein', label='PlayerName')
sumFields.append('PlayerName')
c.exact('StandardizedPosition', 'StandardizedPosition', label='StandardizedPosition')
sumFields.append('StandardizedPosition')
c.exact('KeyPositionGroup', 'KeyPositionGroup', label='KeyPositionGroup')
sumFields.append('KeyPositionGroup')
c.add(YearNFL('Year', 'Year', label='Year'))
sumFields.append('Year')

In [7]:
features = c.compute(pairs, df_nfl, df_247)

In [None]:
sum = 0
for field in sumFields:
    sum = sum + features[field]

features['Sum'] = sum / len(sumFields)

features = features[features['Year'] == 1.0]

features

In [None]:
features.insert(0, 'sourceID', features.index.get_level_values(0))
features.insert(1, 'targetID', features.index.get_level_values(1))

filteredList = []
for idx, data in features.groupby(level=0):
    data = data.loc[data['Sum'].idxmax()]
    if (data['Sum'] > .94):
        filteredList.append(data)
dfFinal = pd.DataFrame()
dfFinal = dfFinal.append(filteredList)
dfFinal.to_csv("resultsNFLTransfer.csv")

In [None]:
dfFinal

In [None]:
fuzzyMI = pd.MultiIndex.from_frame(dfFinal)
recordlinkage.write_annotation_file(
    "../Annotations/Annotations/annotation_nflTransfers.json",
    fuzzyMI[0:100],
    df_nfl,
    df_247,
    dataset_a_name="NFL",
    dataset_b_name="Master"
)

In [None]:
annotation = recordlinkage.read_annotation_file("..//Annotations//Results//nfltransfer_result.json")
try:
    annotation_dict = (annotation.links).to_flat_index()
except Exception as e:
    print(e)

In [None]:
for record in annotation_dict:
    #MAKE SURE YOU UPDATE THE THIRD VALUE TO THE CORRECT KEYDATASET!!
    Values = [record[0], record[1], 3, 1, 1, 1]
    query = '''INSERT INTO RecordLinks(MasterID, TargetID, KeyDataSet, KeyLinkType, LinkConfidence, Transfer)
        VALUES (?,?,?,?,?,?)'''
    
    conn = sql.connect(cc.databaseName)
    c = conn.cursor()
    
    c.execute(query, Values)
    conn.commit()
    
conn.close()