# DATA ACQUISITION

- First import relevant packages for data acquisition and processing. 
- Create local variables for URL of injury database website as well as page ranges for data acquisition.
- Generate a dataframe of ALL injury records in range, and remove 'Acquired' column (for players returning to active roster).

In [None]:
import basketball_reference_scraper
from basketball_reference_scraper.teams import get_roster, get_team_stats, get_opp_stats, get_roster_stats, get_team_misc
from basketball_reference_scraper.seasons import get_schedule, get_standings
from basketball_reference_scraper.players import get_stats, get_game_logs, get_player_headshot
from datetime import datetime as dt
from datetime import timedelta as td
import matplotlib.pyplot as plt
import pandas as pd
import random as rd
import calendar
pd.set_option('display.max_rows', None)
pd.options.display.max_colwidth = 150
start = dt.now()

import warnings
warnings.simplefilter(action = 'ignore', category = FutureWarning)

In [None]:
url = ("https://www.prosportstransactions.com/basketball/Search/SearchResults.php?Player=&Team=&BeginDate=&EndDate=&ILChkBx=yes&Submit=Search&start=")
pnumstart = 3650
pnumend = 33050

iDB = pd.DataFrame()

for i in range(pnumstart, pnumend + 25, 25):
    
    pgTbl = pd.read_html(url + str(i))
    injData = pgTbl[0]
    injData.columns = injData.iloc[0]
    injData = injData.iloc[1:, :]
    iDB = iDB.append(injData)
    
iDB = iDB[~iDB['Notes'].str.contains('activated from IL')]
iDB = iDB[~iDB['Notes'].str.contains('activated')]
iDB['Relinquished'] = iDB['Relinquished'].str.replace('• ', '')
iDB = iDB[['Date', 'Team', 'Relinquished', 'Notes']]
iDB = iDB.reset_index(drop = True)

In [None]:
iDB.to_csv('InjuryDatabaseRaw99-21_NoAcquired.csv', index = False)

In [None]:
iDB = pd.read_csv('InjuryDatabaseRaw99-21_NoAcquired.csv')

# DATA ORGANIZATION AND CLEANING

- Create variables related to search terms for injuries of different categories (e.g., UE = Upper Extremity, etc.)
- Categorize dataframe by  injury types, and cleanly organize those of unspecified nature (e.g., 'Placed on IL' is not useful). 
- Generate a table with relative frequency of injury types to externally validate against published sources. 

In [None]:
UEinjuries = 'arm|shoulder|elbow|wrist|hand|hand/finger|finger|fingers|chest|thumb|rotator cuff|deltoid|tricep|triceps|bicep|biceps|pectoral|pectoralis'
LEinjuries = 'leg|hip|knee|arch|knees|shin|infrapatella|kneecap|patella|patellar|tibia|tibial|fibula|ankle|ankle/tibia|shin/tibia|knee/quadricep|foot|feet|instep|heel|toe|toes|thigh|hip flexor|gastrocnemius|buttock|abductor|adductor|groin|hamstring|quadricep|quadriceps|ACL|calf|achilles|Achilles|plantar|peroneal synovitis'
TRinjuries = 'abdominal|sternum|clavicle|oblique|rib|ribs|ribcage|disc|back|lower back|neck|cervical|cervican|spine|spinal|hernia|pelvis|tailbone'
HEinjuries = 'head injury|bruised head|dizziness|concussion|facial|jaw|orbital|dental|eye|nose|tooth|teeth|root canal|cheek|cheekbone'
SYinjuries = 'COVID-19|frontal|blood clots|blood clot|medicine|body|throat|sinus|lung|lungs|bilateral synovitis|virus|viral|illness|flu/illness|food poisoning|bronchitis|aorta|flu|flu-like|health and safety|rest|nerve|migraine|migraines|headache|headaches|stomach|gastroentritis|gastroenteritis|general|respiratory|tonsilitis|appendectomy|appendix|testicle|testicular|bladder|mononucleosis|peritonsillar|pneumonia|heart|heartbeat|upper respiratory|chicken pox'
OTHRissues = 'death|funeral|waived|legal|suspension|shoot around|arrest|drug|trade|visa|birth|family'

In [None]:
injTypes = [UEinjuries, LEinjuries, TRinjuries, HEinjuries, SYinjuries, OTHRissues]
injCateg = ['Upper Extremity Injuries', 'Lower Extremity Injuries', 'Trunk Injuries', 'Head Injuries', 'Non-Orthopedic (Illness)', 'Off-Court Issues']
injTBL = pd.DataFrame()
count = 0

for j in injTypes:
    number = iDB['Notes'].str.contains(j).sum()
    row = [[injCateg[count], number]]
    injTBL = injTBL.append(row)
    count += 1

add = [['Unspecified', len(iDB[iDB['Notes'] == 'placed on IL']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (date approximate)']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (date unsure)']) +
        len(iDB[iDB['Notes'] == 'placed on IL (out for season)']) +
        len(iDB[iDB['Notes'] == 'placed on IL (Fox)']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (~CBC)']) +
        len(iDB[iDB['Notes'] == 'placed on IL (not CBC)']) +
        len(iDB[iDB['Notes'] == 'placed on IL (F)']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (P)']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (P) (out for season)']) + 
        len(iDB[iDB['Notes'] == 'placed on IL (F) (out for season)'])]]

injTBL = injTBL.append(add)

injTBL.columns = ['Category', 'Frequency']
injTBL = injTBL.sort_values(by=['Frequency'], ascending = False)
injTBL.insert(len(injTBL.columns), 'Percentage', round((injTBL['Frequency']/len(iDB))*100,4))
print("Currently accounting for ", round((injTBL['Frequency'].sum()/len(iDB))*100, 2), "% of Injuries")
injTBL.reset_index(drop = True)

# DERIVE SPECIFIC PROJECT-FOCUSED RECORDS FROM DATABASE

- Revise injury database to include only those records having terms related to 'chronic' injury types. 
- Use basketball-reference-scraper API functions to determine start & end dates of regular seasons for all dates in database.
- Eliminate those injury records reported outside the temporal boundaries of regular seasons.
- Remove players with suffix 'Jr' to eliminate those more likely to retrieve the earlier of two players by same name.
- Refine database to only those names for whom gamestats could be returned by basketball-reference-scraper API. 

In [None]:
LowExInj = iDB[iDB['Notes'].str.contains(LEinjuries)]
LowExInj = LowExInj[~LowExInj['Notes'].str.contains('recovering')]
chronic = "chronic|tendinitis|jumper's|bursitis|periostitis|synovitis|tenosynovitis|inflammation|swollen|swelling|tightness|stiffness|fasciitis|fasciatis|fasciaitis|effusion|sore|soreness|stress|splints"
LowExInj = LowExInj[LowExInj['Notes'].str.contains(chronic)]
LowExInj['Date'] = pd.to_datetime(LowExInj['Date'])
LowExInj = LowExInj.reset_index(drop = True)

In [None]:
LowExInj.insert(len(LowExInj.columns), 'Year', pd.DatetimeIndex(LowExInj['Date']).year)
LowExInj.insert(len(LowExInj.columns), 'Month', pd.to_datetime(LowExInj['Date']).dt.month_name())
LowExInj.insert(len(LowExInj.columns), 'Day', pd.DatetimeIndex(LowExInj['Date']).day)

In [None]:
LowExInj.to_csv('ChronicLEInjuries_WithDates.csv')

In [None]:
DateInj = pd.DataFrame()

for k in range(2000, 2022):

    regsched = get_schedule(k, playoffs = False)
    regsched['DATE'] = pd.to_datetime(regsched['DATE'])
    regstart = regsched['DATE'].iloc[0]
    regend = regsched['DATE'].iloc[-1]

    injData = LowExInj[(LowExInj['Date'] >= regstart) & (LowExInj['Date'] <= regend)]
    DateInj = DateInj.append(injData)
    
DateInj.reset_index(drop = True)

In [None]:
DateInj.to_csv('ChronicLE_RegSeason.csv', index = False)

In [None]:
Data = pd.read_csv('ChronicLE_RegSeason.csv')
Data = Data[~Data['Relinquished'].str.contains('Jr.')]
Data.to_csv('ProjectData.csv', index = False)

In [None]:
Data = pd.read_csv('ProjectData.csv')

In [None]:
searchable = []

for i in range(len(Data) + 1):
    
    name = Data['Relinquished'].iloc[i]
    
    try: 
        
        stats = get_stats(name, stat_type = 'PER_GAME')
        
        if stats.empty == False:
        
            searchable.append(i)
        
    except:
        
        print(i, 'error')

In [None]:
Data = Data.iloc[searchable]
Data = Data.reset_index(drop = True)

In [None]:
Data.to_csv('ProjectDataSearchable.csv', index = False)

# FINAL STAGE OF DATABASE CLEANING BEFORE ACQUIRING PREDICTORS

- Eliminate all records occurring after the COVID-19 shutdown in March of 2020. 
- Associate the correct season (e.g., assign 2020 as 'Season' to an injury occuring during late 2019) to each record.
- Generate a team dictionary to switch full team names to appropriate abbreviations for compatibility with basketball-reference
- Save data to new dataframe

In [None]:
Data = pd.read_csv('ProjectDataSearchable.csv')
Data['Date'] = pd.to_datetime(Data['Date'])
Data = Data[Data['Date'] <= '2020-03-11']

In [None]:
winter = 'October | November | December'
spring = 'January | February | March | April'

Data['Season'] = ''

for m in range(0, len(Data)):
    
    if Data['Month'].iloc[m] in winter:
        
        Data['Season'].iloc[m] = Data['Date'].iloc[m].year + 1
        
    else:
        
        Data['Season'].iloc[m] = Data['Date'].iloc[m].year

In [None]:
teamdict = {'Lakers' : 'LAL', 'Warriors' : 'GSW', 'Nuggets' : 'DEN', 'Raptors' : 'TOR', 'Wizards' : 'WAS', 'Magic' : 'ORL',
        'Pistons' : 'DET', 'Sonics' : 'SEA', 'Bulls' : 'CHI', 'Suns' : 'PHO', '76ers' : 'PHI', 'Blazers' : 'POR', 
        'Nets' : 'BRK', 'Heat' : 'MIA', 'Celtics' : 'BOS', 'Bucks' : 'MIL', 'Cavaliers' : 'CLE', 'Spurs' : 'SAS',
        'Knicks' : 'NYK', 'Kings' : 'SAC', 'Timberwolves' : 'MIN', 'Pacers' : 'IND', 'Rockets' : 'HOU', 'Jazz' : 'UTA',
        'Clippers' : 'LAC', 'Mavericks' : 'DAL', 'Hawks' : 'ATL', 'Bobcats' : 'CHA', 'Thunder' : 'OKC', 'Pelicans' : 'NOP'}

Data = Data.replace({'Team' : teamdict})

for n in range(0, len(Data)):
    
    if Data['Team'].iloc[n] == 'Hornets':
        
        if Data['Year'].iloc[n] <= 2004:
            
            Data['Team'].iloc[n] = 'CHH'
            
        else:
            
            Data['Team'].iloc[n] = 'CHO'

for p in range(0, len(Data)):
    
    if Data['Team'].iloc[p] == 'Grizzlies':
        
        if Data['Year'].iloc[p] <= 2001:
            
            Data['Team'].iloc[p] = 'VAN'
            
        else:
            
            Data['Team'].iloc[p] = 'MEM'

In [None]:
Data.to_csv('ProjectDataSeasonsTeams.csv', index = False)

# CONSTRUCT AN 'INJURED GROUP' DATABASE WITH PREDICTORS

- Create new dataframe with headers associated with all relevant variables for retrieval of statistics and predictors.
- Make calls to basketball-reference-scraper API to retrieve pertinent roster, schedule, and gamelog variables. 
- Save new dataframe to new file. 

In [None]:
Data = pd.read_csv('ProjectDataSeasonsTeams.csv')
Data['Date'] = pd.to_datetime(Data['Date'])
Data

In [None]:
start = dt.now()

colNames = ['Season', 'Year', 'Month', 'Day', 'Team', 'Name', 'Age', 'Height', 'Weight', 'Position', 'Experience', 'AVGmins', 
            'Days', 'Games', 'B2Bs', 'GameDensity', 'OffDays', 'OffDayRatio', 'HomeGames', 'HomeDensity', 'AwayGames', 
            'AwayDensity', 'PreInjAVGmins', 'HomeMinShare', 'AWayMinShare', 'GamesAboveAVGminShare', 'PreInjMinDiff', 'Injury', 'Class']

fullData = pd.DataFrame(columns = colNames)

for w in range(0, len(Data)):
    
    try:
    
        outcome = 1
        year = Data['Year'].iloc[w]
        month = Data['Month'].iloc[w]
        day = Data['Day'].iloc[w]
        inj = Data['Notes'].iloc[w]
        
        season = Data['Season'].iloc[w]
        name = Data['Relinquished'].iloc[w]
        date = Data['Date'].iloc[w].date()
        prev = date - td(days = 21)

        stats = get_stats(name, stat_type = 'PER_GAME')

        avgs = stats[stats['SEASON'] == str(season - 1) + '-' + str(season)[-2:]]
        team = avgs['TEAM'].iloc[-1]
        age = avgs['AGE'].iloc[-1]
        pos = avgs['POS'].iloc[-1]
        mins = avgs['MP'].iloc[-1]
        
        logs = get_game_logs(name, str(prev), str(date))
        minutes = logs['MP'].str.split(':', expand = True).apply(pd.to_numeric)
        minutes['time'] = round(minutes[0] + minutes[1] / 60, 4)
        preinjmins = round(minutes['time'].mean(), 4)
        preinjmindiff = round(preinjmins - mins, 4)
        
        totalMins = minutes['time'].sum()
        homemin = logs.index[logs['HOME/AWAY'] =='HOME']
        awaymin = logs.index[logs['HOME/AWAY'] =='AWAY']
        minsHome = round(minutes.loc[homemin]['time'].sum() / totalMins, 4)
        minsAway = round(minutes.loc[awaymin]['time'].sum() / totalMins, 4)

        rost = get_roster(team, season)
        info = rost[rost['PLAYER'] == name]

        height = info['HEIGHT'].item()
        height = int(height.split('-')[0]) * 12 + int(height.split('-')[1])
        weight = info['WEIGHT'].item()
        yrsnba = info['EXPERIENCE'].item()

        schedule = get_schedule(season)

        teamFinder = {'LAL' : 'Los Angeles Lakers', 'GSW' : 'Golden State Warriors', 'DEN' : 'Denver Nuggets', 'NOH' : 'New Orleans Hornets',
                      'TOR' : 'Toronto Raptors', 'WAS' : 'Washington Wizards', 'ORL' : 'Orlando Magic', 'DET' : 'Detroit Pistons', 
                      'SEA' : 'Seattle SuperSonics', 'CHI' : 'Chicago Bulls', 'PHO' : 'Phoenix Suns', 'PHI' : 'Philadelphia 76ers', 
                      'VAN' : 'Vancouver Grizzlies', 'POR' : 'Portland Trailblazers', 'BRK' : 'Brooklyn Nets', 'MIA' : 'Miami Heat', 
                      'BOS' : 'Boston Celtics', 'CHH' : 'Charlotte Hornets', 'MIL' : 'Milwaukee Bucks', 'CLE' : 'Cleveland Cavaliers', 
                      'SAS' : 'San Antonio Spurs', 'NYK' : 'New York Knicks', 'SAC' : 'Sacramento Kings', 'MIN' : 'Minnesota Timberwolves', 
                      'IND' : 'Indiana Pacers', 'HOU' : 'Houston Rockets', 'UTA' : 'Utah Jazz', 'LAC' : 'Los Angeles Clippers', 
                      'DAL' : 'Dallas Mavericks', 'MEM' : 'Memphis Grizzlies', 'ATL' : 'Atlanta Hawks', 'CHA' : 'Charlotte Bobcats', 
                      'CHO' : 'Charlotte Hornets','OKC' : 'Oklahoma City Thunder', 'NOP' : 'New Orleans Pelicans', 'NJN' : 'New Jersey Nets',
                     'NOK' : 'New Orleans/Oklahoma City Hornets'}

        sched = schedule[schedule.isin([teamFinder[team]]).any(axis=1)]
        pregames = sched[sched['DATE'].between(prev, date)]
        numgames = len(pregames)

        days = (pregames['DATE'].iloc[-1].date() - pregames['DATE'].iloc[0].date()).days
        
        aboveavgmins = round(sum(minutes['time'] > mins) / len(pregames), 4)

        offdays = days - len(pregames)
        offdaydensity = round(offdays / days, 4)
        density = round(len(pregames) / days, 4)
        homegames = len(pregames[pregames['HOME'] == teamFinder[team]])
        homedensity = round(len(pregames[pregames['HOME'] == teamFinder[team]]) / days, 4)
        awaygames = len(pregames[pregames['VISITOR'] == teamFinder[team]])
        awaydensity = round(len(pregames[pregames['VISITOR'] == teamFinder[team]]) / days, 4)
        
        pregames['B2B'] = ''

        for q in range(0, len(pregames) -1):
    
            if pregames['DATE'].iloc[q + 1].date() - pregames['DATE'].iloc[q].date() < td(days = 2):
        
                pregames['B2B'].iloc[q] = 1
                pregames['B2B'].iloc[q + 1] = 1
            
            if len(pregames[pregames['B2B'] == 1]) / 2 >= 1:
                
                numb2b = len(pregames[pregames['B2B'] == 1]) / 2
                
            else:
                
                numb2b = 0

        row = [season, year, month, day, team, name, age, height, weight, pos, yrsnba, mins, days, numgames, numb2b, density, 
               offdays, offdaydensity, homegames, homedensity, awaygames, awaydensity, preinjmins, 
               minsHome, minsAway, aboveavgmins, preinjmindiff, inj, outcome]
        
        fullData.loc[len(fullData)] = row

    except (TypeError, KeyError, IndexError, AttributeError, ValueError, ZeroDivisionError): w += 1
        
print('Produced ', len(fullData), ' rows of data for your project in ', dt.now() - start, ' h:mm:ss.')
fullData

In [None]:
fullData.to_csv('InjuredGroup.csv', index = False)

# EXECUTE SAME PROCEDURES TO GENERATE CONTROL GROUP

- For up to 2000 iterations, match injured records to a random selection from player-team combination of same season.
- Exclude retreived player name if they appear in the injured database
- Save new file as randomized control group. 

In [None]:
iDB = pd.read_csv('InjuryDatabaseRaw99-21_NoAcquired.csv')
Data = pd.read_csv('InjuredGroup.csv')
Data

In [None]:
start = dt.now()

colNames = ['Season', 'Year', 'Month', 'Day', 'Team', 'Name', 'Age', 'Height', 'Weight', 'Position', 'Experience', 'AVGmins', 
            'Days', 'Games', 'B2Bs', 'GameDensity', 'OffDays', 'OffDayRatio', 'HomeGames', 'HomeDensity', 'AwayGames', 
            'AwayDensity', 'PreInjAVGmins', 'HomeMinShare', 'AWayMinShare', 'GamesAboveAVGminShare', 'PreInjMinDiff', 'Injury', 'Class']

fullData = pd.DataFrame(columns = colNames)


for w in range(0, 2000):
    
    j = rd.randint(0, len(Data) -1)

    again = 1

    while again == 1:

        season = Data['Season'].iloc[j]
        conopt = Data[Data['Season'] == season]

        team = conopt['Team'].iloc[rd.randint(0, len(conopt['Team']) -1 )]
        roster = get_roster(str(team), season)
        player = str((roster['PLAYER'].iloc[rd.randint(0,len(roster['PLAYER']) -1 )]))

        if iDB['Relinquished'].str.contains(player).any():

            again = 1

        else:

            again = 0
                
        if fullData['Name'].str.contains(player).any():

            again = 1

        else:

            again = 0

    try:

        outcome = 0
        inj = ''
        stats = get_stats(player, stat_type = 'PER_GAME')
        avgs = stats[stats['SEASON'] == str(season - 1) + '-' + str(season)[-2:]]
        team = avgs['TEAM'].iloc[-1]
        age = avgs['AGE'].iloc[-1]
        pos = avgs['POS'].iloc[-1]
        mins = avgs['MP'].iloc[-1].astype(int)

        schedule = get_schedule(season)

        teamFinder = {'LAL' : 'Los Angeles Lakers', 'GSW' : 'Golden State Warriors', 'DEN' : 'Denver Nuggets', 'NOH' : 'New Orleans Hornets',
                      'TOR' : 'Toronto Raptors', 'WAS' : 'Washington Wizards', 'ORL' : 'Orlando Magic', 'DET' : 'Detroit Pistons', 
                      'SEA' : 'Seattle SuperSonics', 'CHI' : 'Chicago Bulls', 'PHO' : 'Phoenix Suns', 'PHI' : 'Philadelphia 76ers', 
                      'VAN' : 'Vancouver Grizzlies', 'POR' : 'Portland Trailblazers', 'BRK' : 'Brooklyn Nets', 'MIA' : 'Miami Heat', 
                      'BOS' : 'Boston Celtics', 'CHH' : 'Charlotte Hornets', 'MIL' : 'Milwaukee Bucks', 'CLE' : 'Cleveland Cavaliers', 
                      'SAS' : 'San Antonio Spurs', 'NYK' : 'New York Knicks', 'SAC' : 'Sacramento Kings', 'MIN' : 'Minnesota Timberwolves', 
                      'IND' : 'Indiana Pacers', 'HOU' : 'Houston Rockets', 'UTA' : 'Utah Jazz', 'LAC' : 'Los Angeles Clippers', 
                      'DAL' : 'Dallas Mavericks', 'MEM' : 'Memphis Grizzlies', 'ATL' : 'Atlanta Hawks', 'CHA' : 'Charlotte Bobcats', 
                      'CHO' : 'Charlotte Hornets','OKC' : 'Oklahoma City Thunder', 'NOP' : 'New Orleans Pelicans', 'NJN' : 'New Jersey Nets',
                      'NOK' : 'New Orleans/Oklahoma City Hornets'}

        sched = schedule[schedule.isin([teamFinder[team]]).any(axis=1)]

        date = sched['DATE'].iloc[rd.randint(0, len(sched) -1)].date()
        year = date.year
        month = date.strftime("%B")
        day = date.day
        prev = date - td(days = 21)

        pregames = sched[sched['DATE'].between(prev, date)]
        numgames = len(pregames)

        logs = get_game_logs(player, str(prev), str(date))
        minutes = logs['MP'].str.split(':', expand = True).apply(pd.to_numeric)
        minutes['time'] = round(minutes[0] + minutes[1] / 60, 4)
        preinjmins = round(minutes['time'].mean(), 4)
        preinjmindiff = round(preinjmins - mins, 4)

        totalMins = minutes['time'].sum()
        homemin = logs.index[logs['HOME/AWAY'] =='HOME']
        awaymin = logs.index[logs['HOME/AWAY'] =='AWAY']
        minsHome = round(minutes.loc[homemin]['time'].sum() / totalMins, 4)
        minsAway = round(minutes.loc[awaymin]['time'].sum() / totalMins, 4)

        info = roster[roster['PLAYER'] == player]

        height = info['HEIGHT'].item()
        height = int(height.split('-')[0]) * 12 + int(height.split('-')[1])
        weight = info['WEIGHT'].item()
        yrsnba = info['EXPERIENCE'].item()

        days = (pregames['DATE'].iloc[-1].date() - pregames['DATE'].iloc[0].date()).days

        aboveavgmins = round(sum(minutes['time'] > mins) / len(pregames), 4)

        offdays = days - len(pregames)
        offdaydensity = round(offdays / days, 4)
        density = round(len(pregames) / days, 4)
        homegames = len(pregames[pregames['HOME'] == teamFinder[team]])
        homedensity = round(len(pregames[pregames['HOME'] == teamFinder[team]]) / days, 4)
        awaygames = len(pregames[pregames['VISITOR'] == teamFinder[team]])
        awaydensity = round(len(pregames[pregames['VISITOR'] == teamFinder[team]]) / days, 4)

        pregames['B2B'] = ''

        for q in range(0, len(pregames) -1):

            if pregames['DATE'].iloc[q + 1].date() - pregames['DATE'].iloc[q].date() < td(days = 2):

                pregames['B2B'].iloc[q] = 1
                pregames['B2B'].iloc[q + 1] = 1

            if len(pregames[pregames['B2B'] == 1]) / 2 >= 1:

                numb2b = len(pregames[pregames['B2B'] == 1]) / 2

            else:

                numb2b = 0

        row = [season, year, month, day, team, player, age, height, weight, pos, yrsnba, mins, days, numgames, numb2b, density, 
        offdays, offdaydensity, homegames, homedensity, awaygames, awaydensity, preinjmins, 
        minsHome, minsAway, aboveavgmins, preinjmindiff, inj, outcome]

        fullData.loc[len(fullData)] = row

    except(TypeError, KeyError, IndexError, AttributeError, ValueError, ZeroDivisionError): j += 1


            
print('Produced ', len(fullData), ' rows of data for your project in ', dt.now() - start, ' h:mm:ss.')
fullData.to_csv('RandomizedControlGroup.csv')
fullData