In [46]:
import pandas as pd
import numpy as np
import re
from bs4 import BeautifulSoup
import requests
import time
from tqdm import tqdm

In [336]:
pattern = r'(?<!Waived|Signed)(WR|TE|QB|RB) ([A-Za-z ]+) \(([A-Za-z]+)\) (?: , | on Injured Reserve.|\sand\s)'
matches = []
for tp in tqdm([x for x in range(2014,2015)]):
    for month in range(1,13): 
        txt_month = str(month)
        if month < 10:
            txt_month = '0' + txt_month
        
        url = 'https://www.footballdb.com/transactions/index.html'
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15'}
        time.sleep(2)
        r = requests.get(url, params={'period': str(tp) + txt_month}, headers=headers)
        soup = BeautifulSoup(r.content,'html.parser')


         # We extract all of the dates for the year-specific page and place them into a list
        dateslst = soup.find_all("div", class_='stacktable-title')
        for date in dateslst:
            teamblock = date.find_next_sibling()
            teamslst =  teamblock.find_all('b')
            translist = teamblock.find_all('div', class_='td w75 td-clear')
            couple = list(zip(teamslst,translist))  #Arz and a bunch of teams

            for team, trans in couple:
                # print(trans.text)
                obj = {}
                obj["date"] = date.text.replace('\n','')
                obj['team'] = team.text
                obj['original_transaction'] = trans.text
                
                # Each player mentioned in the transaction has a link to their player page, we'll treat this as a unique
                # identifier
                pages = [x['href'] for x in trans.find_all('a')]
                
                #...as well as the player's name
                players = re.findall(r'([A-Z]{1,2} [A-Za-z\'\`\.\-]+ [A-Za-z\'\-]+)+',trans.text)
                
                # We'll marry the page and the player into a list of tuples
                obj['all_players_mentioned'] = dict(zip(players,pages))
                # obj["Position"] = stuff[0]
                # obj["Name"] = stuff[1]
                matches.append(obj)
        
        # matches.extend([{'Date':date.text.replace('\n','') ,'Team': team.text, 'Position':stuff[0], 'Name': stuff[1],  } for stuff in trans.text])
        # print(date.text, trans.text)
    #     print('========')

    
  

100%|██████████| 1/1 [00:34<00:00, 34.78s/it]


In [337]:
df = pd.DataFrame(matches)
df = df.astype({'date': 'datetime64[ns]'})
df.dtypes

date                     datetime64[ns]
team                             object
original_transaction             object
all_players_mentioned            object
dtype: object

In [338]:
df.date.dt.month.value_counts()

8     360
9     288
5     273
12    264
3     242
10    236
11    224
7     149
6     146
4     132
1      85
2      41
Name: date, dtype: int64

In [339]:
# Want to split based on periods (sentences) but only if they are NOT preceded by a Capital (indicating part of name)
df['transaction_lst'] = df['original_transaction'].str.split(r'(?<![A-Z])\. ')

In [206]:
df.to_csv('data/All_Transactions.csv', index=False, sep='|')

In [340]:
# The dataframe is specific to the day, meaning multiple transactions are in one field. Each transaction sentence
# will become it's own row

df_lst = df.explode('transaction_lst')

In [341]:
df_lst.sample(20)

Unnamed: 0,date,team,original_transaction,all_players_mentioned,transaction_lst
322,2014-03-11,San Francisco 49ers,Signed DB Antoine Bethea. Re-signed K Phil Daw...,{'DB Antoine Bethea': '/players/antoine-bethea...,Signed DB Antoine Bethea
2281,2014-12-17,Green Bay Packers,Signed DB Jean Fanor to the Practice Squad.,{'DB Jean Fanor': '/players/jean-fanor-fanorje...,Signed DB Jean Fanor to the Practice Squad.
1980,2014-11-26,Houston Texans,Signed QB Thaddeus Lewis. Signed LB Terrance P...,{'QB Thaddeus Lewis': '/players/thaddeus-lewis...,Signed QB Thaddeus Lewis
883,2014-06-05,Jacksonville Jaguars,Acquired WR Kevin Smith via waivers (from the ...,{'WR Kevin Smith': '/players/kevin-smith-smith...,Acquired WR Kevin Smith via waivers (from the ...
1227,2014-08-23,New York Jets,Released OG Bruce Campbell. Waived LB Steele D...,{'OG Bruce Campbell': '/players/bruce-campbell...,"Waived LB Steele Divitto, OT Patrick Ford, K A..."
1632,2014-09-06,Miami Dolphins,Signed DB Brandian Ross to the Practice Squad....,{'DB Brandian Ross': '/players/brandian-ross-r...,Signed DB Brandian Ross to the Practice Squad
1524,2014-09-19,New York Giants,Waived OG Eric Herman.,{'OG Eric Herman': '/players/eric-herman-herma...,Waived OG Eric Herman.
1094,2014-08-31,Seattle Seahawks,"Signed TE Rashaun Allen, RB Demitrius Bronson,...",{'TE Rashaun Allen': '/players/rashaun-allen-a...,"Signed TE Rashaun Allen, RB Demitrius Bronson,..."
608,2014-05-19,Green Bay Packers,Signed draft pick DT Khyri Thornton. Signed un...,{'DT Khyri Thornton': '/players/khyri-thornton...,Signed draft pick DT Khyri Thornton
338,2014-03-08,Jacksonville Jaguars,Signed DE Red Bryant.,{'DE Red Bryant': '/players/red-bryant-bryanre...,Signed DE Red Bryant.


In [342]:
df_lst['players_in_trans'] = df_lst['transaction_lst'].str.findall(r'([A-Z]{1,2} [A-Za-z\'\`\.\-]+ [A-Za-z\'\-]+)+')

In [343]:
df_ply_lst = df_lst.explode('players_in_trans')
df_ply_lst.head(7)

Unnamed: 0,date,team,original_transaction,all_players_mentioned,transaction_lst,players_in_trans
0,2014-01-30,Detroit Lions,Signed C Darren Keyton to a future contract.,{'C Darren Keyton': '/players/darren-keyton-ke...,Signed C Darren Keyton to a future contract.,C Darren Keyton
1,2014-01-29,Arizona Cardinals,Signed K Danny Hrapmann to a future contract.,{'K Danny Hrapmann': '/players/danny-hrapmann-...,Signed K Danny Hrapmann to a future contract.,K Danny Hrapmann
2,2014-01-29,Kansas City Chiefs,Signed DT Risean Broussard to a future contract.,{'DT Risean Broussard': '/players/risean-brous...,Signed DT Risean Broussard to a future contract.,DT Risean Broussard
3,2014-01-28,Pittsburgh Steelers,Signed WR Danny Coale to a future contract.,{'WR Danny Coale': '/players/danny-coale-coale...,Signed WR Danny Coale to a future contract.,WR Danny Coale
4,2014-01-27,Green Bay Packers,Signed RB Ina Liaina to a future contract.,{'RB Ina Liaina': '/players/ina-liaina-liainin...,Signed RB Ina Liaina to a future contract.,RB Ina Liaina
5,2014-01-27,Pittsburgh Steelers,Signed RB Tauren Poole and LB Vic So'oto to a ...,{'RB Tauren Poole': '/players/tauren-poole-poo...,Signed RB Tauren Poole and LB Vic So'oto to a ...,RB Tauren Poole
5,2014-01-27,Pittsburgh Steelers,Signed RB Tauren Poole and LB Vic So'oto to a ...,{'RB Tauren Poole': '/players/tauren-poole-poo...,Signed RB Tauren Poole and LB Vic So'oto to a ...,LB Vic So'oto


In [348]:
df_ply_lst[df_ply_lst['transaction_lst'].str.contains('LB Rolando McClain')]['original_transaction'].values

array(['Placed LB Rolando McClain on the Reserve/Retired List.',
       'Reinstated LB Rolando McClain.',
       'Traded LB Rolando McClain to the Dallas Cowboys.'], dtype=object)

In [374]:
df_ply_lst[df_ply_lst['transaction_lst'] == 'Louis Rams']

Unnamed: 0,date,team,original_transaction,all_players_mentioned,transaction_lst,players_in_trans,action
1755,2014-10-28,Tampa Bay Buccaneers,Traded DB Mark Barron to the St. Louis Rams. T...,{'DB Mark Barron': '/players/mark-barron-barro...,Louis Rams,,


In [376]:
def getTransactionByPlayer(transaction,player):
    trans = transaction.split()
    if trans[0] == 'Placed':
        if trans[-1] == "Reserve.":
        # found = re.findall(r'([A-Z]{1,2} [A-Za-z\'\`\.]+ [A-Za-z\']+ [\(\)A-Za-z\'\`]+)+ (on Injured Reserve)',transaction)
            found = re.findall(rf'{player} ([\(\)A-Za-z]+) .*(on Injured Reserve.)',transaction)
            if len(found) > 0:
                return "Placed " + player + found[0][0] + " " + found[0][1]
            else:
                return found
        elif trans[-2] == "Reserve/Retired":
            return "Placed on the Reserve/Retired List"
            
            
    elif trans[0] == 'Signed':
        # found = re.findall(rf'{player} .*',transaction)
        # if len(found) > 0:
        return "Signed"
    elif trans[0] == 'Re-signed':
        return "Re-signed"
    elif trans[0] == "Released":
        return "Released"
    elif trans[0] == "Released":
        return "Released" 
    elif tran[0] == "Reinstated"
        return "Reinstated"
    elif trans[0] == "Acquired":
        return "Acquired"
    elif trans[0] == "Traded":
        return "Traded"
    elif trans[0] == "Waived":
        return "Waived"
    elif trans[0] == "Reinstated":
        return "Reinstated"
        # if trans[-2] == "Reserve/Retired List":
        #     print(trans)
            # return "Placed on the Reserve/Retired List"
    elif trans[0] == "Activated":
        return 'Activated from the Reserve/Suspended List'
    
    else:
        print(transaction)




df_ply_lst['action'] = df_ply_lst.apply(lambda x: getTransactionByPlayer(x['transaction_lst'],x['players_in_trans']), axis=1)

Louis Rams)
(Shoulder) on Injured Reserve.
(injured).
Louis Rams)
to the Practice Squad.
(Shoulder) on Injured Reserve.
(injured).
Louis Rams practice squad
Louis Rams
Louis Rams)
to the Practice Squad
Louis Rams practice squad
Louis Rams practice squad


In [386]:
df_ply_lst[df_ply_lst['action'].apply(lambda x: type(x) is not str)]
# [df_ply_lst['transaction_lst'].str.contains(' and ')]

Unnamed: 0,date,team,original_transaction,all_players_mentioned,transaction_lst,players_in_trans,action
409,2014-04-17,Baltimore Ravens,Reinstated LB Rolando McClain.,{'LB Rolando McClain': '/players/rolando-mccla...,Reinstated LB Rolando McClain.,LB Rolando McClain,
453,2014-04-08,Washington Redskins,Signed P Blake Clingan. Placed DE Brandon Moor...,{'P Blake Clingan': '/players/blake-clingan-cl...,Placed DE Brandon Moore on the Reserve/Suspend...,DE Brandon Moore,
500,2014-05-30,Arizona Cardinals,Placed LB Daryl Washington on the Reserve/Susp...,{'LB Daryl Washington': '/players/daryl-washin...,Placed LB Daryl Washington on the Reserve/Susp...,LB Daryl Washington,
533,2014-05-28,Seattle Seahawks,Placed OT Garrett Scott (Heart) on the Non-Foo...,{'OT Garrett Scott': '/players/garrett-scott-s...,Placed OT Garrett Scott (Heart) on the Non-Foo...,OT Garrett Scott,
651,2014-05-16,New York Jets,Acquired RB Daryl Richardson via waivers (from...,{'RB Daryl Richardson': '/players/daryl-richar...,Louis Rams),,
...,...,...,...,...,...,...,...
2418,2014-12-03,Tennessee Titans,Signed DB Jemea Thomas from the St. Louis Rams...,{'DB Jemea Thomas': '/players/jemea-thomas-tho...,Louis Rams practice squad,,
2425,2014-12-02,Miami Dolphins,Signed DB Lowell Rose to the Practice Squad. R...,{'DB Lowell Rose': '/players/lowell-rose-rosel...,Placed WR LaRon Byrd on the Practice Squad/Inj...,WR LaRon Byrd,
2426,2014-12-02,New York Giants,Signed RB Chris Ogbonnaya. Signed LB James Dav...,{'RB Chris Ogbonnaya': '/players/chris-ogbonna...,"Placed DE Robert Ayers (Pectoral), DE Mathias ...",DE Robert Ayers,[]
2426,2014-12-02,New York Giants,Signed RB Chris Ogbonnaya. Signed LB James Dav...,{'RB Chris Ogbonnaya': '/players/chris-ogbonna...,"Placed DE Robert Ayers (Pectoral), DE Mathias ...",DE Mathias Kiwanuka,[]


In [334]:
df_ply_lst
# .columns
df_ply_lst['page'] = df_ply_lst.apply(lambda x: x['all_players_mentioned'][x['players_in_trans']] ,axis=1)

In [335]:
df_ply_lst

Unnamed: 0,date,team,original_transaction,all_players_mentioned,transaction_lst,players_in_trans,action,page
0,2014-03-31,Houston Texans,Released DB Danieal Manning.,{'DB Danieal Manning': '/players/danieal-manni...,Released DB Danieal Manning.,DB Danieal Manning,Released DB Danieal Manning,/players/danieal-manning-mannida01
1,2014-03-31,New York Giants,Signed DB Zack Bowman. Re-signed DT Mike Patte...,{'DB Zack Bowman': '/players/zack-bowman-bowma...,Signed DB Zack Bowman,DB Zack Bowman,Signed DB Zack Bowman,/players/zack-bowman-bowmaza01
1,2014-03-31,New York Giants,Signed DB Zack Bowman. Re-signed DT Mike Patte...,{'DB Zack Bowman': '/players/zack-bowman-bowma...,Re-signed DT Mike Patterson.,DT Mike Patterson,Re-signed DT Mike Patterson,/players/mike-patterson-pattemi01
2,2014-03-31,New York Jets,Re-signed LB Nick Bellore.,{'LB Nick Bellore': '/players/nick-bellore-bel...,Re-signed LB Nick Bellore.,LB Nick Bellore,Re-signed LB Nick Bellore,/players/nick-bellore-belloni01
3,2014-03-31,Oakland Raiders,Signed DB Carlos Rogers and DE C.J. Wilson.,{'DB Carlos Rogers': '/players/carlos-rogers-r...,Signed DB Carlos Rogers and DE C.J. Wilson.,DB Carlos Rogers,Signed DB Carlos Rogers,/players/carlos-rogers-rogerca01
...,...,...,...,...,...,...,...,...
239,2014-03-04,Washington Redskins,"Released DE Adam Carriker, TE Richard Quinn an...",{'DE Adam Carriker': '/players/adam-carriker-c...,Waived RB Jawan Jamison and RB Davin Meggett.,RB Jawan Jamison,Waived RB Jawan Jamison,/players/jawan-jamison-jamisja01
239,2014-03-04,Washington Redskins,"Released DE Adam Carriker, TE Richard Quinn an...",{'DE Adam Carriker': '/players/adam-carriker-c...,Waived RB Jawan Jamison and RB Davin Meggett.,RB Davin Meggett,Waived RB Davin Meggett,/players/davin-meggett-meggeda02
240,2014-03-03,Buffalo Bills,Waived DL Willie Jefferson.,{'DL Willie Jefferson': '/players/willie-jeffe...,Waived DL Willie Jefferson.,DL Willie Jefferson,Waived DL Willie Jefferson,/players/willie-jefferson-jeffewi01
241,2014-03-03,Detroit Lions,Signed DT Corvey Irvin. Released OG Leroy Harris.,{'DT Corvey Irvin': '/players/corvey-irvin-irv...,Signed DT Corvey Irvin,DT Corvey Irvin,Signed DT Corvey Irvin,/players/corvey-irvin-irvinco01


In [349]:
txt = 'Placed LB Rolando McClain on the Reserve/Retired List.'

In [351]:
lst = txt.split()

In [353]:
lst[-2]

'Reserve/Retired'