# Connecting datasets

### Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

### Premier League table data

In [2]:
table_data = pd.read_csv('data/table_data.csv')
table_data.columns = ['FINAL_POSITION','TEAM','FINAL_POINTS','SEASON']

In [3]:
table_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON
0,1,Manchester United,80,2000-01
1,2,Arsenal,70,2000-01
2,3,Liverpool,69,2000-01
3,4,Leeds United,68,2000-01
4,5,Ipswich Town,66,2000-01


In [4]:
table_data.tail()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON
335,16,Burnley,40,2016-17
336,17,Watford,40,2016-17
337,18,Hull City,34,2016-17
338,19,Middlesbrough,28,2016-17
339,20,Sunderland,24,2016-17


### Wages data

In [5]:
wages_data = pd.read_csv('data/combined_wages_scaled.csv')

In [6]:
wages_data.head()

Unnamed: 0,TEAM,WAGE,WAGE_SCALED,SEASON
0,Chelsea,50000000.0,1.83277,2000-01
1,Manchester United,50000000.0,1.83277,2000-01
2,Liverpool,49000000.0,1.74886,2000-01
3,Leeds United,43000000.0,1.2454,2000-01
4,Arsenal,41000000.0,1.07758,2000-01


In [7]:
wages_data.tail()

Unnamed: 0,TEAM,WAGE,WAGE_SCALED,SEASON
343,Watford,41000000.0,-0.826545,2016-17
344,Bournemouth,34000000.0,-0.930447,2016-17
345,Middlesbrough,34000000.0,-0.930447,2016-17
346,Burnley,33000000.0,-0.94529,2016-17
347,Hull City,25000000.0,-1.064036,2016-17


In [8]:
wages_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 348 entries, 0 to 347
Data columns (total 4 columns):
TEAM           348 non-null object
WAGE           348 non-null float64
WAGE_SCALED    348 non-null float64
SEASON         348 non-null object
dtypes: float64(2), object(2)
memory usage: 11.0+ KB


In [9]:
table_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 4 columns):
FINAL_POSITION    340 non-null int64
TEAM              340 non-null object
FINAL_POINTS      340 non-null int64
SEASON            340 non-null object
dtypes: int64(2), object(2)
memory usage: 10.7+ KB


In [10]:
total_data = table_data.merge(wages_data, how='left', on=['TEAM','SEASON'])

In [11]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 6 columns):
FINAL_POSITION    340 non-null int64
TEAM              340 non-null object
FINAL_POINTS      340 non-null int64
SEASON            340 non-null object
WAGE              333 non-null float64
WAGE_SCALED       333 non-null float64
dtypes: float64(2), int64(2), object(2)
memory usage: 18.6+ KB


In [12]:
total_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED
0,1,Manchester United,80,2000-01,50000000.0,1.83277
1,2,Arsenal,70,2000-01,41000000.0,1.07758
2,3,Liverpool,69,2000-01,49000000.0,1.74886
3,4,Leeds United,68,2000-01,43000000.0,1.2454
4,5,Ipswich Town,66,2000-01,18000000.0,-0.852348


In [13]:
total_data.tail()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED
335,16,Burnley,40,2016-17,33000000.0,-0.94529
336,17,Watford,40,2016-17,41000000.0,-0.826545
337,18,Hull City,34,2016-17,25000000.0,-1.064036
338,19,Middlesbrough,28,2016-17,34000000.0,-0.930447
339,20,Sunderland,24,2016-17,68300000.0,-0.421325


### Transfer spend data

In [14]:
transfer_data = pd.read_csv('data/combined_transfer_data.csv')

In [15]:
transfer_data.columns = ['TEAM','NET_TRANSFER_SPEND','NET_TRANSFER_SPEND_SCALED','SEASON']
transfer_data.head()

Unnamed: 0,TEAM,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SEASON
0,Blackburn Rovers,9813000.0,3.405195,1992-93
1,Sheffield Wednesday,3060000.0,1.061846,1992-93
2,Oldham Athletic,2384000.0,0.827268,1992-93
3,Wimbledon,2143000.0,0.743639,1992-93
4,Aston Villa,1956000.0,0.678749,1992-93


In [16]:
transfer_data.tail()

Unnamed: 0,TEAM,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SEASON
501,Watford,11100000.0,0.236934,2016-17
502,Swansea City,9600000.0,0.204916,2016-17
503,Hull City,8800000.0,0.187839,2016-17
504,Liverpool,-2700000.0,-0.057633,2016-17
505,Southampton,-16150000.0,-0.344728,2016-17


In [17]:
transfer_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 4 columns):
TEAM                         506 non-null object
NET_TRANSFER_SPEND           506 non-null float64
NET_TRANSFER_SPEND_SCALED    506 non-null float64
SEASON                       506 non-null object
dtypes: float64(2), object(2)
memory usage: 15.9+ KB


In [18]:
total_data = total_data.merge(transfer_data, how='left', on=['TEAM','SEASON'])

In [19]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 8 columns):
FINAL_POSITION               340 non-null int64
TEAM                         340 non-null object
FINAL_POINTS                 340 non-null int64
SEASON                       340 non-null object
WAGE                         333 non-null float64
WAGE_SCALED                  333 non-null float64
NET_TRANSFER_SPEND           340 non-null float64
NET_TRANSFER_SPEND_SCALED    340 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 23.9+ KB


In [20]:
total_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED
0,1,Manchester United,80,2000-01,50000000.0,1.83277,1820000.0,0.130414
1,2,Arsenal,70,2000-01,41000000.0,1.07758,-5175000.0,-0.37082
2,3,Liverpool,69,2000-01,49000000.0,1.74886,17870000.0,1.280492
3,4,Leeds United,68,2000-01,43000000.0,1.2454,41025000.0,2.939685
4,5,Ipswich Town,66,2000-01,18000000.0,-0.852348,5100000.0,0.365445


In [21]:
total_data.tail()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED
335,16,Burnley,40,2016-17,33000000.0,-0.94529,44400000.0,0.947735
336,17,Watford,40,2016-17,41000000.0,-0.826545,11100000.0,0.236934
337,18,Hull City,34,2016-17,25000000.0,-1.064036,8800000.0,0.187839
338,19,Middlesbrough,28,2016-17,34000000.0,-0.930447,39150000.0,0.835671
339,20,Sunderland,24,2016-17,68300000.0,-0.421325,15830000.0,0.337897


### Squad info

In [22]:
team_info = pd.read_csv('data/team_info.csv')

In [23]:
team_info.head()

Unnamed: 0,TEAM,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,SEASON
0,Arsenal,33,25.0,18,,1999-00
1,Derby County,35,24.7,19,,1999-00
2,Everton,38,24.4,17,,1999-00
3,Liverpool,33,24.2,23,,1999-00
4,Tottenham Hotspur,34,24.9,16,,1999-00


In [24]:
team_info.tail()

Unnamed: 0,TEAM,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,SEASON
355,Bournemouth,26,26.3,10,106000000.0,2016-17
356,Sunderland,28,27.1,20,102000000.0,2016-17
357,Middlesbrough,25,27.9,15,99500000.0,2016-17
358,West Bromwich Albion,20,28.6,14,95600000.0,2016-17
359,Burnley,23,28.2,9,85000000.0,2016-17


In [25]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 8 columns):
FINAL_POSITION               340 non-null int64
TEAM                         340 non-null object
FINAL_POINTS                 340 non-null int64
SEASON                       340 non-null object
WAGE                         333 non-null float64
WAGE_SCALED                  333 non-null float64
NET_TRANSFER_SPEND           340 non-null float64
NET_TRANSFER_SPEND_SCALED    340 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 23.9+ KB


In [26]:
total_data = total_data.merge(team_info, how='left', on=['TEAM','SEASON'])

In [27]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 12 columns):
FINAL_POSITION               340 non-null int64
TEAM                         340 non-null object
FINAL_POINTS                 340 non-null int64
SEASON                       340 non-null object
WAGE                         333 non-null float64
WAGE_SCALED                  333 non-null float64
NET_TRANSFER_SPEND           340 non-null float64
NET_TRANSFER_SPEND_SCALED    340 non-null float64
SQUAD_SIZE                   340 non-null int64
AVG_AGE                      340 non-null float64
NUM_FOREIGN_PLAYERS          340 non-null int64
TEAM_MARKET_VALUE            241 non-null float64
dtypes: float64(6), int64(4), object(2)
memory usage: 34.5+ KB


### Estimated market values

In [28]:
estimated_team_values = pd.read_csv('data/estimated_team_values.csv', index_col=0)
estimated_team_values.tail()

Unnamed: 0,TEAM,SEASON,EST_TEAM_MARKET_VALUE
335,Bournemouth,2016-17,
336,Hull City,2016-17,
337,Swansea City,2016-17,
338,Middlesbrough,2016-17,
339,Sunderland,2016-17,


The NaN's are there because I actually have the transfermarkt data for these observations.  
I'm also going to create an 'is estimated' column so that I know which are the values I generated.

In [29]:
estimated_team_values.dropna(axis=0, how='any', inplace=True)
estimated_team_values['IS_ESTIMATED_MARKET_VAL'] = 1
estimated_team_values.tail()

Unnamed: 0,TEAM,SEASON,EST_TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL
4,Ipswich Town,2000-01,19354000.0,1
8,Charlton Athletic,2000-01,9876000.0,1
98,Norwich City,2004-05,8493000.0,1
12,Leicester City,2000-01,-5679000.0,1
58,West Bromwich Albion,2002-03,-22552000.0,1


In [30]:
def negative_to_zero(input_val):
    if input_val < 0:
        return 0
    else:
        return input_val

estimated_team_values['EST_TEAM_MARKET_VALUE'] = estimated_team_values['EST_TEAM_MARKET_VALUE'].map(negative_to_zero)

In [31]:
estimated_team_values.sample(5)

Unnamed: 0,TEAM,SEASON,EST_TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL
80,Chelsea,2004-05,360542000.0,1
35,Bolton Wanderers,2001-02,49386000.0,1
81,Arsenal,2004-05,181973000.0,1
91,Birmingham City,2004-05,77784000.0,1
64,Newcastle United,2003-04,65333000.0,1


In [32]:
total_data = total_data.merge(estimated_team_values, how='left', on=['TEAM','SEASON'])
# total_data2 = total_data

In [33]:
total_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,EST_TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL
0,1,Manchester United,80,2000-01,50000000.0,1.83277,1820000.0,0.130414,34,24.9,18,,170072000.0,1.0
1,2,Arsenal,70,2000-01,41000000.0,1.07758,-5175000.0,-0.37082,33,25.3,22,,109338000.0,1.0
2,3,Liverpool,69,2000-01,49000000.0,1.74886,17870000.0,1.280492,30,24.5,21,,139748000.0,1.0
3,4,Leeds United,68,2000-01,43000000.0,1.2454,41025000.0,2.939685,36,22.5,22,,104249000.0,1.0
4,5,Ipswich Town,66,2000-01,18000000.0,-0.852348,5100000.0,0.365445,27,24.8,11,,19354000.0,1.0


In [34]:
total_data['TEAM_MARKET_VALUE'] = total_data['TEAM_MARKET_VALUE'].fillna(value=total_data['EST_TEAM_MARKET_VALUE'])

In [35]:
total_data.sample(5)

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,EST_TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL
254,15,Aston Villa,41,2012-13,72000000.0,-0.162781,24630000.0,0.928159,31,24.1,19,101030000.0,,
24,5,Leeds United,66,2001-02,54000000.0,1.139156,41725000.0,2.799861,26,24.1,13,92629000.0,92629000.0,1.0
189,10,Blackburn Rovers,50,2009-10,47000000.0,-0.417002,-23370000.0,-0.635856,35,25.3,29,90300000.0,,
34,15,Everton,43,2001-02,29000000.0,-0.530617,5575000.0,0.374098,31,26.3,19,110502000.0,110502000.0,1.0
320,1,Chelsea,93,2016-17,218000000.0,1.800705,33900000.0,0.723608,25,26.6,20,515000000.0,,


In [36]:
total_data['IS_ESTIMATED_MARKET_VAL'].fillna(value=0, axis=0, inplace=True)
total_data.drop('EST_TEAM_MARKET_VALUE', axis=1, inplace=True)

In [37]:
total_data['TEAM_MARKET_VALUE'] = total_data['TEAM_MARKET_VALUE'].fillna(0)

Scale data...

In [38]:
total_data['TEAM_MARKET_VALUE_SCALED'] = preprocessing.scale(total_data['TEAM_MARKET_VALUE'], with_mean=False)

In [39]:
total_data.sample(3)

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL,TEAM_MARKET_VALUE_SCALED
75,16,Manchester City,41,2003-04,38000000.0,-0.114428,8937000.0,0.236941,33,25.7,24,135209000.0,1.0,1.098447
79,20,Wolverhampton Wanderers,33,2003-04,19000000.0,-0.934852,8725000.0,0.231321,34,26.2,22,44591000.0,1.0,0.36226
331,12,Leicester City,44,2016-17,66000000.0,-0.455464,26050000.0,0.556047,25,27.7,18,205300000.0,0.0,1.667871


### Team continuity data

In [40]:
continuity = pd.read_csv('data/team_continuity.csv')
continuity.sample(5)

Unnamed: 0,TEAM,SEASON,AVG_TIME
122,Birmingham City,2005-06,2.2
335,Swansea City,2015-16,2.9
223,Blackburn Rovers,2010-11,2.5
252,QPR,2011-12,2.1
280,Arsenal,2013-14,3.7


In [41]:
total_data = total_data.merge(continuity, how='left', on=['TEAM','SEASON'])

In [42]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 15 columns):
FINAL_POSITION               340 non-null int64
TEAM                         340 non-null object
FINAL_POINTS                 340 non-null int64
SEASON                       340 non-null object
WAGE                         333 non-null float64
WAGE_SCALED                  333 non-null float64
NET_TRANSFER_SPEND           340 non-null float64
NET_TRANSFER_SPEND_SCALED    340 non-null float64
SQUAD_SIZE                   340 non-null int64
AVG_AGE                      340 non-null float64
NUM_FOREIGN_PLAYERS          340 non-null int64
TEAM_MARKET_VALUE            340 non-null float64
IS_ESTIMATED_MARKET_VAL      340 non-null float64
TEAM_MARKET_VALUE_SCALED     340 non-null float64
AVG_TIME                     340 non-null float64
dtypes: float64(9), int64(4), object(2)
memory usage: 42.5+ KB


### Number of transfers in/out

In [43]:
num_transfers = pd.read_csv('data/num_transfers.csv')

In [44]:
total_data = total_data.merge(num_transfers, how='left', on=['TEAM','SEASON'])

In [45]:
total_data.sample(3)

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL,TEAM_MARKET_VALUE_SCALED,AVG_TIME,NUM_TRANSFERS_IN,NUM_TRANSFERS_OUT
304,5,Manchester United,66,2015-16,203000000.0,1.819297,44650000.0,1.237479,44,22.5,23,374150000.0,0.0,3.03962,2.9,6,14
50,11,Middlesbrough,49,2002-03,29000000.0,-0.618545,37410000.0,1.506136,32,24.0,15,90525000.0,1.0,0.735431,2.2,8,14
200,1,Manchester United,80,2010-11,153000000.0,1.6778,12000000.0,0.307158,43,24.1,28,384150000.0,0.0,3.12086,5.1,4,11


### Club size via stadium capacity

In [46]:
def change_to_float(input_val):
    val = str(input_val)
    val = val.replace(',','')
    val = float(val)
    return val
    
stadium = pd.read_csv('data/stadium_capacity.csv')
stadium = stadium.dropna(how='any', axis=1)
stadium['STADIUM_CAPACITY'] = stadium['STADIUM_CAPACITY'].map(change_to_float)

In [47]:
stadium['STADIUM_CAPACITY_SCALED'] = preprocessing.scale(stadium['STADIUM_CAPACITY'], with_mean=False)
stadium.drop('STADIUM_CAPACITY', axis=1, inplace=True)

In [48]:
total_data = total_data.merge(stadium, how='left', on=['TEAM','SEASON'])
total_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL,TEAM_MARKET_VALUE_SCALED,AVG_TIME,NUM_TRANSFERS_IN,NUM_TRANSFERS_OUT,STADIUM_CAPACITY_SCALED
0,1,Manchester United,80,2000-01,50000000.0,1.83277,1820000.0,0.130414,34,24.9,18,170072000.0,1.0,1.381676,4.5,1,8,6.350441
1,2,Arsenal,70,2000-01,41000000.0,1.07758,-5175000.0,-0.37082,33,25.3,22,109338000.0,1.0,0.888269,4.1,8,12,3.17522
2,3,Liverpool,69,2000-01,49000000.0,1.74886,17870000.0,1.280492,30,24.5,21,139748000.0,1.0,1.135322,2.4,10,10,4.595714
3,4,Leeds United,68,2000-01,43000000.0,1.2454,41025000.0,2.939685,36,22.5,22,104249000.0,1.0,0.846926,2.9,8,8,3.091662
4,5,Ipswich Town,66,2000-01,18000000.0,-0.852348,5100000.0,0.365445,27,24.8,11,19354000.0,1.0,0.157233,2.3,6,7,2.506753


### Number of years in league

In [49]:
years_in_league = pd.read_csv('data/years_in_league.csv')
years_in_league = pd.melt(years_in_league, id_vars=["TEAM"], var_name="SEASON", value_name="YEARS_IN_LEAGUE")

In [50]:
total_data = total_data.merge(years_in_league, how='left', on=['TEAM','SEASON'])
total_data.head()

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL,TEAM_MARKET_VALUE_SCALED,AVG_TIME,NUM_TRANSFERS_IN,NUM_TRANSFERS_OUT,STADIUM_CAPACITY_SCALED,YEARS_IN_LEAGUE
0,1,Manchester United,80,2000-01,50000000.0,1.83277,1820000.0,0.130414,34,24.9,18,170072000.0,1.0,1.381676,4.5,1,8,6.350441,9
1,2,Arsenal,70,2000-01,41000000.0,1.07758,-5175000.0,-0.37082,33,25.3,22,109338000.0,1.0,0.888269,4.1,8,12,3.17522,9
2,3,Liverpool,69,2000-01,49000000.0,1.74886,17870000.0,1.280492,30,24.5,21,139748000.0,1.0,1.135322,2.4,10,10,4.595714,9
3,4,Leeds United,68,2000-01,43000000.0,1.2454,41025000.0,2.939685,36,22.5,22,104249000.0,1.0,0.846926,2.9,8,8,3.091662,9
4,5,Ipswich Town,66,2000-01,18000000.0,-0.852348,5100000.0,0.365445,27,24.8,11,19354000.0,1.0,0.157233,2.3,6,7,2.506753,1


### The big six

In [51]:
big_six_list = ['Arsenal', 'Chelsea', 'Liverpool', 'Manchester United', 'Manchester City', 'Tottenham Hotspur']

In [52]:
total_data['BIG_SIX'] = total_data['TEAM'].isin(big_six_list).astype(int)

In [53]:
total_data.sample(5)

Unnamed: 0,FINAL_POSITION,TEAM,FINAL_POINTS,SEASON,WAGE,WAGE_SCALED,NET_TRANSFER_SPEND,NET_TRANSFER_SPEND_SCALED,SQUAD_SIZE,AVG_AGE,NUM_FOREIGN_PLAYERS,TEAM_MARKET_VALUE,IS_ESTIMATED_MARKET_VAL,TEAM_MARKET_VALUE_SCALED,AVG_TIME,NUM_TRANSFERS_IN,NUM_TRANSFERS_OUT,STADIUM_CAPACITY_SCALED,YEARS_IN_LEAGUE,BIG_SIX
88,9,Tottenham Hotspur,52,2004-05,33000000.0,-0.326542,34225000.0,0.922463,36,23.3,20,167770000.0,1.0,1.362975,2.0,17,20,3.008104,13,1
114,15,Manchester City,43,2005-06,34000000.0,-0.360422,-20025000.0,-1.309864,28,24.7,14,70400000.0,0.0,0.571934,2.4,5,7,3.927246,4,1
333,14,Crystal Palace,41,2016-17,55000000.0,-0.618739,46800000.0,0.998964,27,28.8,18,186500000.0,0.0,1.515139,2.4,9,10,2.172519,4,0
33,14,Charlton Athletic,44,2001-02,22000000.0,-0.998153,12125000.0,0.813621,29,26.3,14,21426000.0,1.0,0.174066,3.3,5,7,2.256078,2,0
322,3,Manchester City,78,2016-17,225000000.0,1.904607,180000000.0,3.842168,25,28.2,21,525250000.0,0.0,4.267166,3.2,12,6,4.595714,15,1


### Export data

In [54]:
total_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 340 entries, 0 to 339
Data columns (total 20 columns):
FINAL_POSITION               340 non-null int64
TEAM                         340 non-null object
FINAL_POINTS                 340 non-null int64
SEASON                       340 non-null object
WAGE                         333 non-null float64
WAGE_SCALED                  333 non-null float64
NET_TRANSFER_SPEND           340 non-null float64
NET_TRANSFER_SPEND_SCALED    340 non-null float64
SQUAD_SIZE                   340 non-null int64
AVG_AGE                      340 non-null float64
NUM_FOREIGN_PLAYERS          340 non-null int64
TEAM_MARKET_VALUE            340 non-null float64
IS_ESTIMATED_MARKET_VAL      340 non-null float64
TEAM_MARKET_VALUE_SCALED     340 non-null float64
AVG_TIME                     340 non-null float64
NUM_TRANSFERS_IN             340 non-null int64
NUM_TRANSFERS_OUT            340 non-null int64
STADIUM_CAPACITY_SCALED      340 non-null float64
YEARS_IN_

In [55]:
total_data.to_csv('data/complete_data.csv', index=None)