In [6]:
from bs4 import BeautifulSoup
import pandas as pd
import requests as req
import collections
from collections import OrderedDict

In [7]:
def combine_stats(urls):
    """
    Given a list of Cricinfo URLs, combines all stats into a one-row DataFrame
    
    Parameters:
    urls (list of str): the list of Cricinfo URLs
    
    Returns:
    DataFrame: the sum of all 15 players' stats in a DataFrame
    
    """
    all_stats = []
    for url in urls:
        all_stats.append(get_player_stats(url))
    counter = collections.Counter()
    for stat in all_stats: 
        counter.update(stat)
    newdict = dict(counter)
    for key in newdict.keys():
        newdict[key] = [newdict[key]]
    return pd.DataFrame.from_dict(newdict)

In [8]:
bat_stats = ['Matches played', 'Innings batted', 'Not outs', 'Runs scored', 'Highest inns score', 
                'Batting average', 'Balls faced', 'Batting strike rate', 'Hundreds scored', 
                'Fifties scored', 'Boundary fours', 'Boundary sixes', 'Catches taken', 'Stumpings made']
bowl_stats = ['Matches played', 'Innings bowled', 'Balls bowled', 'Runs conceded', 'Wickets taken',
                'Bowling average', 'Economy rate','Bowling strike rate', '4 wickets in inning', 
                '5 wickets in inning', '10 wickets in match']

def get_player_stats(url):
    """
    Given the URL of the Cricinfo stats page of a player, ouputs their ODI stats as a dict.
    
    If they are listed as a batsman, only their batting/fielding stats will be included.
    If they are listed as a bowler, only their bowling stats will be included.
    If they are listed as an allrounder, both will be included.
    Best innings bowling and best match bowling will not be included.
    
    Parameters:
    url (str): the Cricinfo URL of the player
    
    Returns:
    dict: the relevant ODI stats of the player
    
    """
    
    bs = BeautifulSoup(req.get(url).text)
    paragraphs = bs.find_all('p', {'class': 'ciPlayerinformationtxt'})
    role = ''
    for par in paragraphs:
        if 'Playing role' in par.b.string:
            role = par.span.string.lower()
            
    tables = bs.find_all('table',{'class':'engineTable'}, limit=2)
    stats = OrderedDict()
    if 'batsman' in role or 'allrounder' in role:
        bat_stat_vals = []
        for row in tables[0].find_all('tr'):
            left_column = row.find('td')
            if left_column != None and left_column.get_text() == 'ODIs':
                for stat in row.find_all('td'):
                    value = stat.get_text()
                    if value == 'ODIs' or '/' in value:
                        continue
                    if '*' in value:
                        value = value.replace('*','')
                    value = float(value)
                    bat_stat_vals.append(value)
        if len(bat_stat_vals) == 0:
            return dict()
        stats.update(dict(zip(bat_stats, bat_stat_vals)))
        
    if 'bowler' in role or 'allrounder' in role:
        bowl_stat_vals = []
        for row in tables[1].find_all('tr'):
            left_column = row.find('td')
            if left_column != None and left_column.get_text() == 'ODIs':
                for stat in row.find_all('td'):
                    value = stat.get_text()
                    if value == 'ODIs' or '/' in value or '-' in value:
                        continue
                    if '*' in value:
                        value = value.replace('*','')
                    value = float(value)
                    bowl_stat_vals.append(value)
        if len(bowl_stat_vals) == 0:
            return dict()
        stats.update(dict(zip(bowl_stats, bowl_stat_vals)))
    
    return stats

In [9]:
urls = ['http://www.espncricinfo.com/india/content/player/253802.html',
       'http://www.espncricinfo.com/india/content/player/34102.html',
       'http://www.espncricinfo.com/india/content/player/28235.html',
       'http://www.espncricinfo.com/india/content/player/28081.html',
       'http://www.espncricinfo.com/india/content/player/290716.html',
       'http://www.espncricinfo.com/india/content/player/625371.html',
       'http://www.espncricinfo.com/india/content/player/326016.html',
       'http://www.espncricinfo.com/india/content/player/559235.html',
       'http://www.espncricinfo.com/india/content/player/430246.html',
       'http://www.espncricinfo.com/india/content/player/625383.html',
       'http://www.espncricinfo.com/india/content/player/481896.html',
       'http://www.espncricinfo.com/india/content/player/477021.html',
       'http://www.espncricinfo.com/india/content/player/30045.html',
       'http://www.espncricinfo.com/india/content/player/422108.html',
       'http://www.espncricinfo.com/india/content/player/234675.html']
india_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/pakistan/content/player/227760.html',
       'http://www.espncricinfo.com/pakistan/content/player/1158100.html',
       'http://www.espncricinfo.com/pakistan/content/player/259551.html',
       'http://www.espncricinfo.com/pakistan/content/player/512191.html',
       'http://www.espncricinfo.com/pakistan/content/player/568276.html',
       'http://www.espncricinfo.com/pakistan/content/player/39950.html',
       'http://www.espncricinfo.com/pakistan/content/player/348144.html',
       'http://www.espncricinfo.com/pakistan/content/player/42657.html',
       'http://www.espncricinfo.com/pakistan/content/player/41434.html',
       'http://www.espncricinfo.com/pakistan/content/player/318788.html',
       'http://www.espncricinfo.com/pakistan/content/player/922943.html',
       'http://www.espncricinfo.com/pakistan/content/player/227758.html',
       'http://www.espncricinfo.com/pakistan/content/player/681305.html',
       'http://www.espncricinfo.com/pakistan/content/player/681117.html',
       'http://www.espncricinfo.com/pakistan/content/player/1072470.html']
pakistan_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/england/content/player/24598.html',
       'http://www.espncricinfo.com/england/content/player/8917.html',
       'http://www.espncricinfo.com/england/content/player/297433.html',
       'http://www.espncricinfo.com/england/content/player/308967.html',
       'http://www.espncricinfo.com/england/content/player/662973.html',
       'http://www.espncricinfo.com/england/content/player/12454.html',
       'http://www.espncricinfo.com/england/content/player/249866.html',
       'http://www.espncricinfo.com/england/content/player/19264.html',
       'http://www.espncricinfo.com/england/content/player/244497.html',
       'http://www.espncricinfo.com/england/content/player/303669.html',
       'http://www.espncricinfo.com/england/content/player/298438.html',
       'http://www.espncricinfo.com/england/content/player/311158.html',
       'http://www.espncricinfo.com/england/content/player/308251.html',
       'http://www.espncricinfo.com/england/content/player/247235.html',
       'http://www.espncricinfo.com/england/content/player/351588.html']
england_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/afghanistan/content/player/318340.html',
       'http://www.espncricinfo.com/afghanistan/content/player/793457.html',
       'http://www.espncricinfo.com/afghanistan/content/player/533956.html',
       'http://www.espncricinfo.com/afghanistan/content/player/320652.html',
       'http://www.espncricinfo.com/afghanistan/content/player/440970.html',
       'http://www.espncricinfo.com/afghanistan/content/player/524049.html',
       'http://www.espncricinfo.com/afghanistan/content/player/318339.html',
       'http://www.espncricinfo.com/afghanistan/content/player/25913.html',
       'http://www.espncricinfo.com/afghanistan/content/player/793463.html',
       'http://www.espncricinfo.com/afghanistan/content/player/516561.html',
       'http://www.espncricinfo.com/afghanistan/content/player/440963.html',
       'http://www.espncricinfo.com/afghanistan/content/player/311427.html',
       'http://www.espncricinfo.com/afghanistan/content/player/974109.html',
       'http://www.espncricinfo.com/afghanistan/content/player/352048.html',
       'http://www.espncricinfo.com/afghanistan/content/player/419873.html']
afghanistan_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/australia/content/player/5334.html',
       'http://www.espncricinfo.com/australia/content/player/272477.html',
       'http://www.espncricinfo.com/australia/content/player/326434.html',
       'http://www.espncricinfo.com/australia/content/player/261354.html',
       'http://www.espncricinfo.com/australia/content/player/489889.html',
       'http://www.espncricinfo.com/australia/content/player/215155.html',
       'http://www.espncricinfo.com/australia/content/player/272279.html',
       'http://www.espncricinfo.com/australia/content/player/6683.html',
       'http://www.espncricinfo.com/australia/content/player/325026.html',
       'http://www.espncricinfo.com/australia/content/player/774223.html',
       'http://www.espncricinfo.com/australia/content/player/267192.html',
       'http://www.espncricinfo.com/australia/content/player/311592.html',
       'http://www.espncricinfo.com/australia/content/player/325012.html',
       'http://www.espncricinfo.com/australia/content/player/219889.html',
       'http://www.espncricinfo.com/australia/content/player/379504.html']
australia_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/bangladesh/content/player/373538.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56143.html',
       'http://www.espncricinfo.com/bangladesh/content/player/269237.html',
       'http://www.espncricinfo.com/bangladesh/content/player/550133.html',
       'http://www.espncricinfo.com/bangladesh/content/player/629070.html',
       'http://www.espncricinfo.com/bangladesh/content/player/629063.html',
       'http://www.espncricinfo.com/bangladesh/content/player/300619.html',
       'http://www.espncricinfo.com/bangladesh/content/player/330902.html',
       'http://www.espncricinfo.com/bangladesh/content/player/410763.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56025.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56007.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56194.html',
       'http://www.espncricinfo.com/bangladesh/content/player/536936.html',
       'http://www.espncricinfo.com/bangladesh/content/player/436677.html',
       'http://www.espncricinfo.com/bangladesh/content/player/56029.html']
bangladesh_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/newzealand/content/player/277906.html',
       'http://www.espncricinfo.com/newzealand/content/player/559066.html',
       'http://www.espncricinfo.com/newzealand/content/player/232364.html',
       'http://www.espncricinfo.com/newzealand/content/player/38699.html',
       'http://www.espncricinfo.com/newzealand/content/player/55395.html',
       'http://www.espncricinfo.com/newzealand/content/player/493773.html',
       'http://www.espncricinfo.com/newzealand/content/player/226492.html',
       'http://www.espncricinfo.com/newzealand/content/player/506612.html',
       'http://www.espncricinfo.com/newzealand/content/player/388802.html',
       'http://www.espncricinfo.com/newzealand/content/player/232359.html',
       'http://www.espncricinfo.com/newzealand/content/player/355269.html',
       'http://www.espncricinfo.com/newzealand/content/player/539511.html',
       'http://www.espncricinfo.com/newzealand/content/player/502714.html',
       'http://www.espncricinfo.com/newzealand/content/player/277912.html',
       'http://www.espncricinfo.com/newzealand/content/player/440516.html']
newzealand_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/southafrica/content/player/44828.html',
       'http://www.espncricinfo.com/southafrica/content/player/379143.html',
       'http://www.espncricinfo.com/southafrica/content/player/321777.html',
       'http://www.espncricinfo.com/southafrica/content/player/44932.html',
       'http://www.espncricinfo.com/southafrica/content/player/43906.html',
       'http://www.espncricinfo.com/southafrica/content/player/600498.html',
       'http://www.espncricinfo.com/southafrica/content/player/337790.html',
       'http://www.espncricinfo.com/southafrica/content/player/327830.html',
       'http://www.espncricinfo.com/southafrica/content/player/540316.html',
       'http://www.espncricinfo.com/southafrica/content/player/550215.html',
       'http://www.espncricinfo.com/southafrica/content/player/47492.html',
       'http://www.espncricinfo.com/southafrica/content/player/542023.html',
       'http://www.espncricinfo.com/southafrica/content/player/481979.html',
       'http://www.espncricinfo.com/southafrica/content/player/40618.html',
       'http://www.espncricinfo.com/southafrica/content/player/379145.html']
southafrica_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/srilanka/content/player/227772.html',
       'http://www.espncricinfo.com/srilanka/content/player/49619.html',
       'http://www.espncricinfo.com/srilanka/content/player/49758.html',
       'http://www.espncricinfo.com/srilanka/content/player/784369.html',
       'http://www.espncricinfo.com/srilanka/content/player/300631.html',
       'http://www.espncricinfo.com/srilanka/content/player/465793.html',
       'http://www.espncricinfo.com/srilanka/content/player/629074.html',
       'http://www.espncricinfo.com/srilanka/content/player/328026.html',
       'http://www.espncricinfo.com/srilanka/content/player/49764.html',
       'http://www.espncricinfo.com/srilanka/content/player/233514.html',
       'http://www.espncricinfo.com/srilanka/content/player/222354.html',
       'http://www.espncricinfo.com/srilanka/content/player/49700.html',
       'http://www.espncricinfo.com/srilanka/content/player/301236.html',
       'http://www.espncricinfo.com/srilanka/content/player/370040.html',
       'http://www.espncricinfo.com/srilanka/content/player/324358.html']
srilanka_stats = combine_stats(urls)
urls = ['http://www.espncricinfo.com/westindies/content/player/431901.html',
       'http://www.espncricinfo.com/westindies/content/player/391485.html',
       'http://www.espncricinfo.com/westindies/content/player/315594.html',
       'http://www.espncricinfo.com/westindies/content/player/604302.html',
       'http://www.espncricinfo.com/westindies/content/player/914567.html',
       'http://www.espncricinfo.com/westindies/content/player/581379.html',
       'http://www.espncricinfo.com/westindies/content/player/670025.html',
       'http://www.espncricinfo.com/westindies/content/player/670013.html',
       'http://www.espncricinfo.com/westindies/content/player/495551.html',
       'http://www.espncricinfo.com/westindies/content/player/446101.html',
       'http://www.espncricinfo.com/westindies/content/player/230553.html',
       'http://www.espncricinfo.com/westindies/content/player/277472.html',
       'http://www.espncricinfo.com/westindies/content/player/51880.html',
       'http://www.espncricinfo.com/westindies/content/player/276298.html',
       'http://www.espncricinfo.com/westindies/content/player/457249.html']
westindies_stats = combine_stats(urls)
combined_stats = [india_stats, pakistan_stats, england_stats, afghanistan_stats, australia_stats, bangladesh_stats,
                 newzealand_stats, southafrica_stats, srilanka_stats, westindies_stats]
df = pd.concat(combined_stats)
df.index = ['India', 'Pakistan', 'England', 'Afghanistan', 'Australia', 'Bangladesh', 'New Zealand', 'South Africa',
           'Sri Lanka', 'West Indies']
display(df)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


Unnamed: 0,Matches played,Innings batted,Not outs,Runs scored,Highest inns score,Batting average,Balls faced,Batting strike rate,Hundreds scored,Fifties scored,...,Innings bowled,Balls bowled,Runs conceded,Wickets taken,Bowling average,Economy rate,Bowling strike rate,4 wickets in inning,5 wickets in inning,10 wickets in match
India,1573.0,1100.0,231.0,40894.0,1288.0,403.27,45613.0,916.67,92.0,218.0,...,532.0,26101.0,21825.0,722.0,334.58,45.99,385.0,25.0,6.0,0.0
Pakistan,1002.0,762.0,119.0,24243.0,1261.0,453.34,29384.0,942.65,44.0,134.0,...,406.0,18693.0,14858.0,453.0,287.63,42.04,329.5,11.0,5.0,0.0
England,1225.0,818.0,122.0,27730.0,1261.0,375.98,28787.0,1008.53,61.0,151.0,...,487.0,23076.0,21820.0,613.0,307.97,47.39,313.8,28.0,6.0,0.0
Afghanistan,837.0,531.0,50.0,14222.0,940.0,265.52,19019.0,700.63,13.0,85.0,...,448.0,19863.0,15240.0,586.0,248.55,42.87,309.5,17.0,7.0,0.0
Australia,814.0,541.0,42.0,19903.0,1054.0,312.92,21686.0,734.52,46.0,108.0,...,315.0,14755.0,12992.0,442.0,266.67,42.79,299.6,20.0,6.0,0.0
Bangladesh,1343.0,904.0,133.0,26231.0,1126.0,341.46,32741.0,892.05,31.0,166.0,...,718.0,33253.0,26885.0,841.0,265.42,40.86,316.4,26.0,7.0,0.0
New Zealand,1157.0,776.0,117.0,26874.0,1126.0,319.68,31179.0,832.25,52.0,155.0,...,442.0,21502.0,19228.0,620.0,264.84,43.41,294.4,19.0,12.0,0.0
South Africa,1143.0,712.0,111.0,26457.0,971.0,336.01,29364.0,622.79,61.0,135.0,...,514.0,22552.0,18781.0,671.0,257.76,45.7,302.5,23.0,7.0,0.0
Sri Lanka,1106.0,647.0,98.0,16551.0,1044.0,309.6,19482.0,978.32,12.0,102.0,...,728.0,29415.0,26293.0,812.0,442.19,55.3,473.4,19.0,13.0,0.0
West Indies,878.0,643.0,67.0,20088.0,1053.0,263.13,23563.0,736.22,40.0,97.0,...,551.0,24067.0,21175.0,612.0,508.65,56.93,532.8,18.0,9.0,0.0


In [10]:
df.to_csv('Team_data.csv')

In [None]:
# encode = {'team1': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10}, 
#          'team2': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10},
# 'toss_winner': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10},
# 'winner': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10}
# matches.replace(encode, inplace=True)

In [129]:
filename = 'ContinousDataset.csv'
file = open(filename)

df2 = pd.read_csv(
    filepath_or_buffer = file,
#     names = ['Scorecard', 'Team1', 'Team2', 'Ground', 'Match Date', 'Winner', 'Venue_Team1', 'Venue_Team2'],
)

In [130]:
df2.rename(index=str, columns={"Team 1": "Team1", "Team 2": "Team2"}, inplace=True)
match_date= list(df2['Match Date'])
new_match_date = []
for date in match_date:
    new_match_date.append(date[-4:])
df2['Match Date']=new_match_date
df2 = df2[2358:]
df2.head(10)



Unnamed: 0.1,Unnamed: 0,Scorecard,Team1,Team2,Margin,Ground,Match Date,Winner,Host_Country,Venue_Team1,Venue_Team2,Innings_Team1,Innings_Team2
2358,2363,ODI # 2470,New Zealand,Sri Lanka,Winner2ndInning,Christchurch,2007,New Zealand,New Zealand,Home,Away,Second,First
2359,2364,ODI # 2471,New Zealand,Sri Lanka,Winner1stInning,Auckland,2007,Sri Lanka,New Zealand,Home,Away,Second,First
2360,2365,ODI # 2473,Australia,England,Winner2ndInning,Melbourne,2007,Australia,Australia,Home,Away,Second,First
2361,2366,ODI # 2474,Australia,New Zealand,Winner1stInning,Hobart,2007,Australia,Australia,Home,Away,First,Second
2362,2367,ODI # 2475,England,New Zealand,Winner2ndInning,Hobart,2007,England,Australia,Neutral,Neutral,Second,First
2363,2368,ODI # 2476,Kenya,Scotland,Winner1stInning,Mombasa,2007,Kenya,Kenya,Home,Away,First,Second
2364,2369,ODI # 2477,Canada,Scotland,Winner2ndInning,Mombasa,2007,Scotland,Kenya,Neutral,Neutral,First,Second
2365,2370,ODI # 2478,Australia,England,Winner2ndInning,Brisbane,2007,Australia,Australia,Home,Away,Second,First
2366,2371,ODI # 2479,Australia,New Zealand,Winner2ndInning,Sydney,2007,Australia,Australia,Home,Away,Second,First
2367,2372,ODI # 2480,India,West Indies,Winner1stInning,Nagpur,2007,India,India,Home,Away,First,Second


In [131]:
count_match=0
count=0
for i in range(len(df2['Team1'])):
    if((df2['Team1'].iloc[i]=='India' or df2['Team2'].iloc[i]=='India') and df2['Host_Country'].iloc[i]=='India'):
        count_match+=1
        if(df2['Winner'].iloc[i]=='India'):
            count+=1
        

In [132]:
count/count_match  #percentage of home wins

0.6301020408163265

In [133]:
# encode = {'Team1': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 
#                      'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10, 'Kenya':0, 'Scotland':0, 
#                      'P.N.G':0, 'Zimbabwe':0, 'Canada':0, 'Hong Kong':0, 'U.A.E.':0, 'Ireland':0, 'Namibia':0, 
#                      'East Africa':0, 'Bermuda':0, 'Netherlands':0, 'U.S.A':0}, 
#           'Team2': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 
#                      'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10, 'Kenya':0, 'Scotland':0, 
#                      'P.N.G':0, 'Zimbabwe':0, 'Canada':0, 'Hong Kong':0, 'U.A.E.':0, 'Ireland':0, 'Namibia':0, 
#                      'East Africa':0, 'Bermuda':0, 'Netherlands':0, 'U.S.A':0},
#           'Winner': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 
#                      'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10, 'Kenya':0, 'Scotland':0, 
#                      'P.N.G':0, 'Zimbabwe':0, 'Canada':0, 'Hong Kong':0, 'U.A.E.':0, 'Ireland':0, 'Namibia':0, 
#                      'East Africa':0, 'Bermuda':0, 'Netherlands':0, 'U.S.A':0},
#           'Host_Country': {'India':1, 'Pakistan':2, 'England':3, 'Afghanistan':4, 'Australia':5, 'Bangladesh':6, 
#                      'New Zealand':7, 'South Africa':8, 'Sri Lanka':9, 'West Indies':10, 'Kenya':0, 'Scotland':0, 
#                      'P.N.G':0, 'Zimbabwe':0, 'Canada':0, 'Hong Kong':0, 'U.A.E.':0, 'Ireland':0, 'Namibia':0, 
#                      'East Africa':0, 'Bermuda':0, 'Netherlands':0, 'U.S.A':0}}
# df2.replace(encode, inplace=True)
          
          

In [134]:
df2= df2[df2.Team1 != 0]
df2= df2[df2.Team2 != 0]
df2= df2[df2.Host_Country != 0]
team1 = list(df2['Team1'])
# pd.get_dummies(team1)

Unnamed: 0,Afghanistan,Australia,Bangladesh,Bermuda,Canada,East Africa,England,Hong Kong,India,Ireland,...,New Zealand,P.N.G.,Pakistan,Scotland,South Africa,Sri Lanka,U.A.E.,U.S.A.,West Indies,Zimbabwe
0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
filename = ''
file = open(filename)

df2 = pd.read_csv(
    filepath_or_buffer = file,
#     names = ['Scorecard', 'Team1', 'Team2', 'Ground', 'Match Date', 'Winner', 'Venue_Team1', 'Venue_Team2'],
)

In [117]:
x_data = df2[:1186]
x_data= x_data[['Team1', 'Team2', 'Host_Country', 'Winner', 'Match Date']]
x_data['Match Date'] = df2['Match Date'].astype('int32')
y_data = df2['Winner'][:1186]
