# All Conference Transfers

In [1]:
import json
import core_constants as cc
import functions as fx
import pandas as pd
import sqlite3 as sql
import recordlinkage

## Build out DataFrames

In [17]:
SQL = '''SELECT * from Transfers247_Unranked'''
df_247 = (fx.connDBAndReturnDF(SQL)).set_index('IDYR')
df_247.index.name = '247_IDYR'
df_247

Unnamed: 0_level_0,ID,PlayerName,Year,StandardizedPosition,KeyPositionGroup
247_IDYR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
thomasbrown_nebraska_2012,thomasbrown_nebraska,thomasbrown,2012,LB,2
zaireanderson_nebraska_2012,zaireanderson_nebraska,zaireanderson,2012,LB,2
lamarrwoodley_michigan_2003,lamarrwoodley_michigan,lamarrwoodley,2003,LB,2
prescottburgess_michigan_2003,prescottburgess_michigan,prescottburgess,2003,DB,2
shawncrable_michigan_2003,shawncrable_michigan,shawncrable,2003,DE,2
...,...,...,...,...,...
brettmedforth_troy_2013,brettmedforth_troy,brettmedforth,2013,OL,1
cardelllue_troy_2013,cardelllue_troy,cardelllue,2013,DB,2
zachmoore_troy_2013,zachmoore_troy,zachmoore,2013,LB,2
sethroberts_troy_2013,sethroberts_troy,sethroberts,2013,DE,2


In [18]:
SQL = '''SELECT ID, PlayerName, Year, StandardizedPosition, KeyPositionGroup from UnlinkedAllConference'''
df_ac = (fx.connDBAndReturnDF(SQL)).set_index('ID')
df_ac.index.name = 'AllConf_ID'
df_ac

Unnamed: 0_level_0,PlayerName,Year,StandardizedPosition,KeyPositionGroup
AllConf_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
kennywillekes_michiganstate,kennywillekes,2017,DL,2
ryananderson_rutgers,ryananderson,2017,PK,3
ryanglasgow_michigan,ryanglasgow,2016,DL,2
tylerdavis_pennstate,tylerdavis,2016,PK,3
emmitcarpenter_minnesota,emmitcarpenter,2016,PK,3
...,...,...,...,...
jovonbouknight_wyoming,jovonbouknight,2005,WR,1
devinmoore_wyoming,devinmoore,2008,RB,1
warddobbs_wyoming,warddobbs,2008,LB,2
austinconway_wyoming,austinconway,2019,ATH,4


### NOTES

> You don't have position data, so you are doing the best you can here.  Blocking on exact name match isn't great, but you'll have to change how you collect from wikipedia to change this - cause you'll need to fetch Position data so you can derive position group and block on that.

In [19]:
indexer = recordlinkage.Index()
indexer = recordlinkage.BlockIndex(on=['KeyPositionGroup'])

In [20]:
pairs = indexer.index(df_ac, df_247)

In [21]:
from functions import YearOther

sumFields = []
c = recordlinkage.Compare()
c.string('PlayerName', 'PlayerName', method='damerau_levenshtein', label='PlayerName')
sumFields.append('PlayerName')
c.exact('StandardizedPosition', 'StandardizedPosition', label='StandardizedPosition')
sumFields.append('StandardizedPosition')
c.exact('KeyPositionGroup', 'KeyPositionGroup', label='KeyPositionGroup')
sumFields.append('KeyPositionGroup')
c.add(YearOther('Year', 'Year', label='Year'))
sumFields.append('Year')

In [22]:
features = c.compute(pairs, df_ac, df_247)

In [23]:
sum = 0
for field in sumFields:
    sum = sum + features[field]

features['Sum'] = sum / len(sumFields)

features = features[features['Year'] == 1.0]

features

Unnamed: 0_level_0,Unnamed: 1_level_0,PlayerName,StandardizedPosition,KeyPositionGroup,Year,Sum
AllConf_ID,247_IDYR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
kennywillekes_michiganstate,thomasbrown_nebraska_2012,0.000000,0,1,1.0,0.500000
kennywillekes_michiganstate,zaireanderson_nebraska_2012,0.000000,0,1,1.0,0.500000
kennywillekes_michiganstate,stevelonga_rutgers_2012,0.076923,0,1,1.0,0.519231
kennywillekes_michiganstate,kevinward_iowa_2013,0.230769,0,1,1.0,0.557692
kennywillekes_michiganstate,jacktrainor_indiana_2015,0.000000,1,1,1.0,0.750000
...,...,...,...,...,...,...
devinmoore_wyoming,nathandouglas_louisiana_2007,0.153846,0,1,1.0,0.538462
devinmoore_wyoming,lancekelley_louisiana_2007,0.090909,0,1,1.0,0.522727
devinmoore_wyoming,jawanzamitchell_louisiana_2007,0.200000,0,1,1.0,0.550000
devinmoore_wyoming,bradmcguire_louisiana_2007,0.181818,0,1,1.0,0.545455


In [27]:
#features.insert(0, 'sourceID', features.index.get_level_values(0))
#features.insert(1, 'targetID', features.index.get_level_values(1))

filteredList = []
for idx, data in features.groupby(level=0):
    data = data.loc[data['Sum'].idxmax()]
    #.929
    if (data['Sum'] > .98):
        filteredList.append(data)
dfFinal = pd.DataFrame()
dfFinal = dfFinal.append(filteredList)
dfFinal.to_csv("resultsAllConferenceTransfer.csv")

In [28]:
dfFinal

Unnamed: 0,sourceID,targetID,PlayerName,StandardizedPosition,KeyPositionGroup,Year,Sum
"(adamvincent_utep, adamvincent_arizonastate_2004)",adamvincent_utep,adamvincent_arizonastate_2004,1.0,1,1,1.0,1.0
"(adariusbowman_oklahomastate, adariusbowman_northcarolina_2003)",adariusbowman_oklahomastate,adariusbowman_northcarolina_2003,1.0,1,1,1.0,1.0
"(adrianbushell_louisville, adrianbushell_florida_2008)",adrianbushell_louisville,adrianbushell_florida_2008,1.0,1,1,1.0,1.0
"(alexlewis_nebraska, alexlewis_colorado_2010)",alexlewis_nebraska,alexlewis_colorado_2010,1.0,1,1,1.0,1.0
"(alihighsmith_lsu, alihighsmith_miami_2003)",alihighsmith_lsu,alihighsmith_miami_2003,1.0,1,1,1.0,1.0
...,...,...,...,...,...,...,...
"(tekerreincuba_tcu, tekerreincuba_missouri_2007)",tekerreincuba_tcu,tekerreincuba_missouri_2007,1.0,1,1,1.0,1.0
"(trentmackey_tulane, trentmackey_duke_2008)",trentmackey_tulane,trentmackey_duke_2008,1.0,1,1,1.0,1.0
"(tyrellfenroy_louisianalafayette, tyrellfenroy_louisiana_2005)",tyrellfenroy_louisianalafayette,tyrellfenroy_louisiana_2005,1.0,1,1,1.0,1.0
"(tyronemckenzie_southflorida, tyronemckenzie_michiganstate_2004)",tyronemckenzie_southflorida,tyronemckenzie_michiganstate_2004,1.0,1,1,1.0,1.0


In [29]:
fuzzyMI = pd.MultiIndex.from_frame(dfFinal)
recordlinkage.write_annotation_file(
    "../Annotations/Annotations/annotation_ACTransfersUnranked.json",
    fuzzyMI[0:160],
    df_ac,
    df_247,
    dataset_a_name="All Conference",
    dataset_b_name="Master"
)

In [30]:
annotation = recordlinkage.read_annotation_file("..//Annotations//Results//result-acunranked.json")
try:
    annotation_dict = (annotation.links).to_flat_index()
except Exception as e:
    print(e)

In [31]:
for record in annotation_dict:
    #MAKE SURE YOU UPDATE THE THIRD VALUE TO THE CORRECT KEYDATASET!!
    Values = [record[0], record[1], 4, 1, 1, 1,1]
    query = '''INSERT INTO RecordLinks(MasterID, TargetID, KeyDataSet, TargetKeyDataSet, KeyLinkType, LinkConfidence, Transfer)
        VALUES (?,?,?,?,?,?,?)'''
    
    conn = sql.connect(cc.databaseName)
    c = conn.cursor()
    
    c.execute(query, Values)
    conn.commit()
    
conn.close()