# MARCH MADNESS PREDICTOR

Authors: Connor Finn, Riley Greene <br>
Date: 1/24/20 <br>
Warren Buffet is still paying 1 billion for a perfect bracket

In [228]:
# our imports for the model
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup 
import lxml.html as lh

# a few constants
start_year = 2009

# Create the Data Pipeline

This class will create the data pipeline for this project. It has the following methods:
* make_folder() takes in no external arguments, it simply creates a folder in the working directory called 'collected_data' if one does not already exist.
* write_data_one() takes in a bball_scraper object (defined below). 
  + It writes the scraped data into a csv file. 
* write_data_two() works identically to write_data_one but has a different end csv file
* write_team_list() takes in a dataframe and writes it to csv.
* write_game_list() takes in a dataframe and writes it to csv.


In [229]:
import os

class collect_pipeline(object):

    def make_folder(self):
        if not os.path.exists('./collected_data'):
            os.mkdir('./collected_data')

    def write_data_one(self, spider):
        spider.data.to_csv('collected_data/season_data.csv' , index=False )
    
    def write_data_two(self , spider):
        spider.data.to_csv('collected_data/more_team_data.csv' , index=False)
        
    def write_team_list(self, df):
        df.to_csv('collected_data/team_list.csv')
    
    def write_game_list(self, df):
        df.to_csv('collected_data/ncaa_short.csv')


# Create the Spider

In [230]:

class bball_scraper():
   

    def __init__(self ):
        self.start_urls = []
        self.data_one = None
    
    # url_list is a list of dictionary's. each has a 'name' id' , 'year' , 'url'
    # for the second group of urls - data will be none type
    def set_urls(self , url_list):
        self.start_urls = url_list
    
    # this is the first set of urls. only need the first url from one table
    def parse_one(self):
        season_stats = pd.DataFrame()   # start with an empty dataframe
        
        for item in self.start_urls:
            # extact the information we need
            team_id = item['id']
            team_name = item['name']
            url = item['url']
            year = item['year']
            print(" url , " , url)
            print("Team name " , team_name)   # this shows progress
            page = requests.get(url)
            soup = BeautifulSoup(page.content, "lxml")
            tables = soup.findAll('table')    # search for tables 
            # convert to a dataframe and label the data
            df = pd.read_html(str(tables[1]))[0]      # select the table of interest into a pandas dataframe  
            df.drop([1 , 2, 3] , inplace=True)
            df = df.replace('Team' , team_name)   # want the name of the team 
            df['Team_ID'] = team_id        # put in the team ID's
            df['Date'] = year  # only want the year in this column
            df = df.rename(columns={'Unnamed: 0': 'Team'})

            
            season_stats = season_stats.append(df , ignore_index = True , sort=False)
           # season_stats = pd.concat([season_stats, df] ,  axis=0) # add this to the season stats empty dataframe we started with
              # simply clean up the columns 
            
            # save the data
            self.data = season_stats  
            
        # this is the second set of urls - more than one row from the table
    def parse_two(self):
        wl_teams = pd.DataFrame()      # initialize and empty dataframe
        for item in self.start_urls:
                # extract the info
            team_idea = item['id']
            team_name = item['name']
            url = item['url']
                   
                # parse the page
            page = requests.get(url)
            soup = BeautifulSoup(page.content, "lxml")
            tables = soup.findAll('table')
        
                # build a dataframe of the win loss data
            win_loss_df = pd.read_html(str(tables))[0]
            win_loss_6_df = win_loss_df.head(12)
            win_loss_6_df['team'] = team_name

            wl_teams = wl_teams.append(win_loss_6_df , ignore_index=True)    

        # clean the dataframe

        wl_teams.columns = wl_teams.columns.droplevel(level=0)
        wl_teams['team'] = wl_teams[""]
        wl_teams.drop(""  , axis=1)
        
        self.data = wl_teams

# Create the List of URLS to scrape

The team_id provides names written as 'Penn State'. The data we will be collecting is from Sports-reference.com. The url for penn state's data is 'https://www.sports-reference.com/cbb/schools/penn-state/2020.html' <br>
1) the name needs to be adjusted to no spaces, all lower caps <br>
2) the overall framework is 'https://www.sports-reference.com/cbb/schools/NAME/YEAR.html'<br>

In [231]:
"""
* this function will be used to transform a provided name to one that can be input into a sports-reference url
* this will likely need to be adjusted as new teams are included
* this needs a unit test (i.e. run all our team names through once to see if they pass)
"""
class name_cleaner():
    
    def __init__(self):
        self.clean_names = []
        self.num_errors = 0
        
    # not too happy about this
    # names is a list of strings
    def clean(self , names):
        
        clean_names = []
        for name in names:
            team = name
            team = '!' + team + '!'                   # first and last character
            team = team.replace(' ' , '-')            # no spaces
            team = team.replace('(' , '')
            team = team.replace(')' , '')
            team = team.replace('.' , '')
            team = team.replace( "'" , '')
            team = team.replace("&" , "")
            team = team.lower()
            team = team.replace('!southern-univ!' , '!southern!')
            team = team.replace('!w-texas-am!' , '!west-texas-am!')
            team = team.replace('!armstrong-st!' , '!armstrong!')
            team = team.replace('ark-' , 'arkansas-')
            team = team.replace('-st!' , '-state!')
            team = team.replace('!n-' , '!north-')
            team = team.replace('!e-' , '!eastern-')
            team = team.replace('!cs-' , '!cal-state-')
            team = team.replace('!s-illinois!' , '!southern-illinois!')
            team = team.replace('!s-' , '!south-')
            team = team.replace('!w-' , '!western-')
            team = team.replace('!nc-' , '!north-carolina-')
            team = team.replace('chr!' , 'christian!')
            team = team.replace('-so!' , '-southern!')
            team = team.replace('!fl-' , '!florida-')
            team = team.replace('!uc-' , '!california-')
            team = team.replace('intl!' , 'international!')
            team = team.replace('cal-state-sacramento' , 'sacramento-state')
            team = team.replace('univ!' , 'university!')
            team = team.replace('miss!' , 'mississippi!')
            team = team.replace('-slo' , '')
            team = team.replace('car!' , 'carolina!')
            team = team.replace('!ne-omaha!' , '!nebraska-omaha!')
            team = team.replace('!ne-' , '!northeastern-')
            team = team.replace('!ut-' , '!texas-')
            team = team.replace('okla-' , 'oklahoma-')
            team = team.replace('!penn!' , '!pennsylvania!')
            team = team.replace('!unc' , '!north-carolina-')
            team = team.replace('!american-university!' , '!american!')
            team = team.replace('!detroit!' , '!detroit-mercy!')
            team = team.replace('!loy-' , '!loyola-')
            team = team.replace('!loyola-chicago!' , '!loyola-il!')
            team = team.replace('!north-kentucky!' , '!northern-kentucky!')
            team = team.replace('!north-illinois!' , '!northern-illinois!')
            team = team.replace('!north-colorado!' , '!northern-colorado!')
            team = team.replace('!cal-baptist!' , '!california-baptist!')
            team = team.replace('!augusta!' , '!augusta-state!')
            team = team.replace('!etsu!' , '!east-tennessee-state!')
            team = team.replace('!mt-' , '!mount-')
            team = team.replace('!g-washington!' , '!george-washington!')
            team = team.replace('!ga-' , '!georgia-')
            team = team.replace('!il-' , '!illinois-')
            team = team.replace('!houston-bap!' , '!houston-baptist!')
            team = team.replace('!kennesaw!' , '!kennesaw-state!')
            team = team.replace('!bowling-green!' , '!bowling-green-state!')
            team = team.replace('!col-charleston!' , '!college-of-charleston!')
            team = team.replace('!cent-arkansas!' , '!central-arkansas!')
            team = team.replace('!central-conn!' , '!central-connecticut-state!')
            team = team.replace('!kent!' , '!kent-state!')
            team = team.replace('!lsu!' , '!louisiana-state!')
            team = team.replace('!ms-' , '!mississippi-')
            team = team.replace('!f-dickinson!' , '!fairleigh-dickinson!')
            team = team.replace('!byu!' , '!brigham-young!')
            team = team.replace('!ma-' , '!massachusetts-')
            team = team.replace('!northwestern-la!' , '!northwestern-state!')
            team = team.replace('!long-island!' , '!long-island-university!')
            team = team.replace('!wi-' , '!')
            team = team.replace('!c-' , '!central-')
            team = team.replace('!md-e-shore!' , '!maryland-eastern-shore!')
            team = team.replace('!st-johns!' , '!st-johns-ny!')
            team = team.replace('!tcu!' , '!texas-christian!')
            team = team.replace('!tx-' , '!texas-')
            team = team.replace('!va-' , '!virginia-')
            team = team.replace('!vmi!' , '!virginia-millitary-institute!')
            team = team.replace('!wku!' , '!western-kentucky!')
            team = team.replace('!utep!' , '!texas-el-paso!')
            team = team.replace('!st-marys-ca!' , '!saint-marys-ca!')
            team = team.replace('!santa-barbara!' , '!california-santa-barbara!')
            team = team.replace('!unlv!' , '!nevada-las-vegas!')
            team = team.replace('-pa!' , '!')
            team = team.replace('!uab!' , '!alabama-birmingham!')
            team = team.replace('!mtsu!' , '!middle-tennessee!')
            team = team.replace('!smu!' , '!southern-methodist!')
            team = team.replace('!sf-austin!' , '!stephen-f-austin!')
            team = team.replace('!st-josephs!' , '!saint-josephs!')
            team = team.replace('!umbc!' , '!maryland-baltimore-county!')
            team = team.replace('!usc!' , '!southern-california!')
            team = team.replace('!st-peters!' , '!saint-peters!')
            team = team.replace('!st-louis!' , '!saint-louis!')
            team = team.replace('!ull!' , '!louisiana-lafayette!')
            team = team.replace('!usc!' , '!southern-california!')
            team = team.replace('--' , '-')
            team = team.replace("university-of-" , "")
            team = team[1:-1]
            
            clean_names.append(team)
        self.clean_names = clean_names


    def test_names(self , other_names = None):
        # we can use this to test the names it just cleaned, or an entirely different set of names
        if other_names == None:
            team_names = self.clean_names
        else:
            team_names = other_names
        
        
        errors = 0
        for name in team_names:
            url_test =  "https://www.sports-reference.com/cbb/schools/" + name + "/index.html"
            page_test = requests.get(url_test)   # scrape
            soup_test = BeautifulSoup(page_test.content, "lxml")   # parse
            tables_test = soup_test.findAll('table')    # search for tables 
            if len(tables_test) != 0:
                print("                                 " , name , " Pass")
            else:
                print(name , " Fail")
                errors += 1
        print("finished unit test. There were " , errors , " errors.")
        self.num_errors = errors

### Generate URLS

In [232]:
class url_generator():
    
    # team_array is a 2 by n array. where the first row is the year. the second row is the team_name
    def __init__(self, ncaa_games , team_df):
        self.teams = None
        self.team_data = team_df
        self.ncaa_games = ncaa_games
        self.url_list_one = []
        self.url_list_two = []


    '''
    team_data is a dataframe which has the columns: 'TeamID', 'TeamName', 'FirstD1Season', 'LastD1Season'.  
    We want to creates a new column which has the team names in accordance to the sports-reference.com framework. 
    
    First, reduce the team_data dataframe to only those teams who actually compteded in the tournament
    '''
    def select_teams(self):
        s1 = set(self.team_data.TeamID)
        s2 = set(self.ncaa_games.WTeamID.append(self.ncaa_games.LTeamID))
        extra_teams = s1.symmetric_difference(s2)
        for value in extra_teams:
            self.team_data = self.team_data.drop(self.team_data[self.team_data["TeamID"] == value].index)
      
        # get a list of all the team names in our dataframe
        team_names = self.team_data.TeamName.values.tolist()
        # clean every name in the team_data dataframe
        mr_clean = name_cleaner()                          # make a name_cleaner object
        mr_clean.clean(team_names)                         # clean the names
        print(len(mr_clean.clean_names))
        # test names and add cleaned names to df if no errors
        #mr_clean.test_names()                              # test the names
        
        if mr_clean.num_errors ==0:                        # only go forward with no errors
            self.team_data["SrNames"] = mr_clean.clean_names
        
    '''
    Goal: compile season long data for the teams who competed in the 2010 - 2018 NCAA tournaments. The idea is that the season long data is the information we will have in the future for creating predictions.

        * in the ncaa_short, we have the year the game was played, and the id's of both teams
        * in team_data we have the team id, along with the team name (cleaned for sports-reference use).
    '''
    
    def build_array(self):
       # get arrays which include the team years and id's 
        winner_array = np.vstack((self.ncaa_games.Season.values , self.ncaa_games.WTeamID.values ))
        loser_array = np.vstack((self.ncaa_games.Season.values , self.ncaa_games.LTeamID.values))

        # List of years
        years = self.ncaa_games.Season.unique()

        # the below loop will create a 2 x n array of all unique teams (year , school_id) which competed in NCAA's
        all_teams = np.empty((2,0)) # fill this array
        a_team_list = []
        for year in years:
            temp_1 = self.ncaa_games.WTeamID.loc[(self.ncaa_games.Season == year)].append(self.ncaa_games.LTeamID.loc[(self.ncaa_games.Season == year)]).unique()
            a_team_list = a_team_list + temp_1.tolist()
            temp_2 = np.full(shape = len(temp_1), fill_value = year , dtype = np.int)
            temp_3 = np.vstack((temp_2 , temp_1)) 
            all_teams = np.hstack((all_teams , temp_3)) # fill array

        # keep track of this
        self.teams = all_teams
        
        
    # there are two webpages per team on sports reference we would like to scrape
    # for this webpage, we only want one row from the first table
    def build_url_one(self):
        url_one = []
        for i in range(self.teams.shape[1]):   # this is the years 
            team = {}
            team_name = self.team_data.SrNames.loc[(self.team_data.TeamID == self.teams[1 , i])].values.tolist()[0] # get the team Name
            url = "https://www.sports-reference.com/cbb/schools/" + str(team_name) + "/" + str(int(self.teams[0 , i])) + ".html" # season data
           
            # build the team dictionary
            team['name'] = team_name
            team['id']   = self.teams[1 , i]
            team['year'] = self.teams[0 , i]
            team['url']  = url 
            url_one.append(team)
       
        self.url_list_one = url_one
        
    # this webpage gives overview data for a team over a span of years.
    # we only want a few of these years.
    def build_url_two(self):
        url_two = []
        teamIDs = pd.unique(self.teams[1])    # we only want each team once
        for j in range(len(teamIDs)): 
            team = {}
            team_name = self.team_data.SrNames.loc[(self.team_data.TeamID == teamIDs[j])].values.tolist()[0]
            url = "https://www.sports-reference.com/cbb/schools/" + team_name + "/"
            
            # build the team dictionary
            team['name'] = team_name
            team['id']   = teamIDs[j]
            team['year'] = None
            team['url']  = url 
            url_two.append(team)
       
        self.url_list_two = url_two
        

# Read Data and run model

Because basketball has changed so much over the past years, we believe that data prior to the year 2009 could be detrimental to our model. For this reason, the next block of code will be used to slim the data down from the 1985 - 2018 seasons to a dataframe including the the 2020 - 2018 NCAA tournaments.

In [233]:
# read ncaa tournament results into dataframe ( data obtained from Kaggle)
ncaa_total = pd.read_csv('NCAATourneyCompactResults.csv')  
# read team ID dataframe (from Kaggle)
team_data = pd.read_csv('teams.csv')  
# get the first index for the desired year
start = min(ncaa_total[ncaa_total.iloc[:,0] == start_year].index.values.astype(int)) 
# shorten our dataframe
ncaa_short = ncaa_total.iloc[start: , :]

In [234]:
# create data pipeline object
pipe = collect_pipeline()
pipe.make_folder()

# make the spider object
spider = bball_scraper()

# make url_generator object
go_daddy = url_generator( ncaa_short , team_data)

# Step 1: Make the urls 
go_daddy.select_teams()
go_daddy.build_array()
go_daddy.build_url_one()
go_daddy.build_url_two()

#parse the first
spider.set_urls(go_daddy.url_list_one)
spider.parse_one()
pipe.write_data_one(spider)

# parse the second
spider.set_urls(go_daddy.url_list_two)
spider.parse_two()
pipe.write_data_two(spider)

# save the other
pipe.write_team_list(go_daddy.team_data)
pipe.write_game_list(go_daddy.ncaa_games)

217
 url ,  https://www.sports-reference.com/cbb/schools/morehead-state/2009.html
Team name  morehead-state
 url ,  https://www.sports-reference.com/cbb/schools/connecticut/2009.html
Team name  connecticut
 url ,  https://www.sports-reference.com/cbb/schools/duke/2009.html
Team name  duke
 url ,  https://www.sports-reference.com/cbb/schools/gonzaga/2009.html
Team name  gonzaga
 url ,  https://www.sports-reference.com/cbb/schools/louisiana-state/2009.html
Team name  louisiana-state
 url ,  https://www.sports-reference.com/cbb/schools/maryland/2009.html
Team name  maryland
 url ,  https://www.sports-reference.com/cbb/schools/memphis/2009.html
Team name  memphis
 url ,  https://www.sports-reference.com/cbb/schools/michigan/2009.html
Team name  michigan
 url ,  https://www.sports-reference.com/cbb/schools/north-carolina/2009.html
Team name  north-carolina
 url ,  https://www.sports-reference.com/cbb/schools/oklahoma/2009.html
Team name  oklahoma
 url ,  https://www.sports-reference.com/cbb

 url ,  https://www.sports-reference.com/cbb/schools/georgia-tech/2010.html
Team name  georgia-tech
 url ,  https://www.sports-reference.com/cbb/schools/gonzaga/2010.html
Team name  gonzaga
 url ,  https://www.sports-reference.com/cbb/schools/maryland/2010.html
Team name  maryland
 url ,  https://www.sports-reference.com/cbb/schools/michigan-state/2010.html
Team name  michigan-state
 url ,  https://www.sports-reference.com/cbb/schools/missouri/2010.html
Team name  missouri
 url ,  https://www.sports-reference.com/cbb/schools/ohio-state/2010.html
Team name  ohio-state
 url ,  https://www.sports-reference.com/cbb/schools/pittsburgh/2010.html
Team name  pittsburgh
 url ,  https://www.sports-reference.com/cbb/schools/purdue/2010.html
Team name  purdue
 url ,  https://www.sports-reference.com/cbb/schools/syracuse/2010.html
Team name  syracuse
 url ,  https://www.sports-reference.com/cbb/schools/texas-am/2010.html
Team name  texas-am
 url ,  https://www.sports-reference.com/cbb/schools/west-

 url ,  https://www.sports-reference.com/cbb/schools/wofford/2011.html
Team name  wofford
 url ,  https://www.sports-reference.com/cbb/schools/missouri/2011.html
Team name  missouri
 url ,  https://www.sports-reference.com/cbb/schools/bucknell/2011.html
Team name  bucknell
 url ,  https://www.sports-reference.com/cbb/schools/california-santa-barbara/2011.html
Team name  california-santa-barbara
 url ,  https://www.sports-reference.com/cbb/schools/st-johns-ny/2011.html
Team name  st-johns-ny
 url ,  https://www.sports-reference.com/cbb/schools/utah-state/2011.html
Team name  utah-state
 url ,  https://www.sports-reference.com/cbb/schools/princeton/2011.html
Team name  princeton
 url ,  https://www.sports-reference.com/cbb/schools/louisville/2011.html
Team name  louisville
 url ,  https://www.sports-reference.com/cbb/schools/vanderbilt/2011.html
Team name  vanderbilt
 url ,  https://www.sports-reference.com/cbb/schools/northern-colorado/2011.html
Team name  northern-colorado
 url ,  http

 url ,  https://www.sports-reference.com/cbb/schools/st-bonaventure/2012.html
Team name  st-bonaventure
 url ,  https://www.sports-reference.com/cbb/schools/belmont/2012.html
Team name  belmont
 url ,  https://www.sports-reference.com/cbb/schools/detroit-mercy/2012.html
Team name  detroit-mercy
 url ,  https://www.sports-reference.com/cbb/schools/duke/2012.html
Team name  duke
 url ,  https://www.sports-reference.com/cbb/schools/long-island-university/2012.html
Team name  long-island-university
 url ,  https://www.sports-reference.com/cbb/schools/san-diego-state/2012.html
Team name  san-diego-state
 url ,  https://www.sports-reference.com/cbb/schools/missouri/2012.html
Team name  missouri
 url ,  https://www.sports-reference.com/cbb/schools/michigan/2012.html
Team name  michigan
 url ,  https://www.sports-reference.com/cbb/schools/saint-marys-ca/2012.html
Team name  saint-marys-ca
 url ,  https://www.sports-reference.com/cbb/schools/temple/2012.html
Team name  temple
 url ,  https://ww

 url ,  https://www.sports-reference.com/cbb/schools/dayton/2014.html
Team name  dayton
 url ,  https://www.sports-reference.com/cbb/schools/florida/2014.html
Team name  florida
 url ,  https://www.sports-reference.com/cbb/schools/harvard/2014.html
Team name  harvard
 url ,  https://www.sports-reference.com/cbb/schools/louisville/2014.html
Team name  louisville
 url ,  https://www.sports-reference.com/cbb/schools/michigan/2014.html
Team name  michigan
 url ,  https://www.sports-reference.com/cbb/schools/michigan-state/2014.html
Team name  michigan-state
 url ,  https://www.sports-reference.com/cbb/schools/north-dakota-state/2014.html
Team name  north-dakota-state
 url ,  https://www.sports-reference.com/cbb/schools/oregon/2014.html
Team name  oregon
 url ,  https://www.sports-reference.com/cbb/schools/pittsburgh/2014.html
Team name  pittsburgh
 url ,  https://www.sports-reference.com/cbb/schools/san-diego-state/2014.html
Team name  san-diego-state
 url ,  https://www.sports-reference.c

 url ,  https://www.sports-reference.com/cbb/schools/kansas/2015.html
Team name  kansas
 url ,  https://www.sports-reference.com/cbb/schools/louisville/2015.html
Team name  louisville
 url ,  https://www.sports-reference.com/cbb/schools/maryland/2015.html
Team name  maryland
 url ,  https://www.sports-reference.com/cbb/schools/michigan-state/2015.html
Team name  michigan-state
 url ,  https://www.sports-reference.com/cbb/schools/northern-iowa/2015.html
Team name  northern-iowa
 url ,  https://www.sports-reference.com/cbb/schools/oklahoma/2015.html
Team name  oklahoma
 url ,  https://www.sports-reference.com/cbb/schools/oregon/2015.html
Team name  oregon
 url ,  https://www.sports-reference.com/cbb/schools/san-diego-state/2015.html
Team name  san-diego-state
 url ,  https://www.sports-reference.com/cbb/schools/virginia/2015.html
Team name  virginia
 url ,  https://www.sports-reference.com/cbb/schools/west-virginia/2015.html
Team name  west-virginia
 url ,  https://www.sports-reference.c

 url ,  https://www.sports-reference.com/cbb/schools/texas-tech/2016.html
Team name  texas-tech
 url ,  https://www.sports-reference.com/cbb/schools/colorado/2016.html
Team name  colorado
 url ,  https://www.sports-reference.com/cbb/schools/north-carolina-wilmington/2016.html
Team name  north-carolina-wilmington
 url ,  https://www.sports-reference.com/cbb/schools/seton-hall/2016.html
Team name  seton-hall
 url ,  https://www.sports-reference.com/cbb/schools/chattanooga/2016.html
Team name  chattanooga
 url ,  https://www.sports-reference.com/cbb/schools/iona/2016.html
Team name  iona
 url ,  https://www.sports-reference.com/cbb/schools/austin-peay/2016.html
Team name  austin-peay
 url ,  https://www.sports-reference.com/cbb/schools/stony-brook/2016.html
Team name  stony-brook
 url ,  https://www.sports-reference.com/cbb/schools/buffalo/2016.html
Team name  buffalo
 url ,  https://www.sports-reference.com/cbb/schools/southern-california/2016.html
Team name  southern-california
 url ,  

 url ,  https://www.sports-reference.com/cbb/schools/troy/2017.html
Team name  troy
 url ,  https://www.sports-reference.com/cbb/schools/northern-kentucky/2017.html
Team name  northern-kentucky
 url ,  https://www.sports-reference.com/cbb/schools/jacksonville-state/2017.html
Team name  jacksonville-state
 url ,  https://www.sports-reference.com/cbb/schools/oklahoma-state/2017.html
Team name  oklahoma-state
 url ,  https://www.sports-reference.com/cbb/schools/miami-fl/2017.html
Team name  miami-fl
 url ,  https://www.sports-reference.com/cbb/schools/texas-southern/2017.html
Team name  texas-southern
 url ,  https://www.sports-reference.com/cbb/schools/iona/2017.html
Team name  iona
 url ,  https://www.sports-reference.com/cbb/schools/creighton/2017.html
Team name  creighton
 url ,  https://www.sports-reference.com/cbb/schools/marquette/2017.html
Team name  marquette
 url ,  https://www.sports-reference.com/cbb/schools/kent-state/2017.html
Team name  kent-state
 url ,  https://www.sports

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [235]:
print("file finished running")

file finished running
