# try to understand whether the location affects the game results

Target Functions:
1. Analyse which City has the most preferred home-game field for each team
2. Analyse team performance related to city
3. Is Continent a factor that affect home/away team performance?

Features to be added in the future
1. relate City with Altitude
2. relate City with Season (Latitude and month)
3. ...

In [55]:
#import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [56]:
#read data
data_df=pd.read_csv("01_data/soccer_results.csv")

In [57]:
#define a function to convert data format to year/month/day and append to the dataframe
def convert_date(df, col='date'):
    ''' converte data format to year/month/day
    
    
    INPUT:
    a dataframe with yyyy-mm-dd data format in colume='col'
    
    OUTPUT:
    a dataframe appended 'year','month', and 'day'
    
    '''
    
    df['year']=pd.to_datetime(df[col]).dt.year
    df['month']=pd.to_datetime(df[col]).dt.month
    df['day']=pd.to_datetime(df[col]).dt.day
    
    return df

In [58]:
#define a function to determin winning team
def home_win(home_away_list, flag):
    ''' Determine which if the home team wins the game by comparing home_score vs away_score
    
    INPUT:
    list of ['home_team','away_team','home_score','away_score']
    
    OUTPUT:

    if flag = 0
    2 - home win
    1 - tie
    0 - home lose

    if flag = 1
    name of the win team
    
    if flag = 2
    name of the lose team

    '''
    home_team = home_away_list[0]
    away_team = home_away_list[1]
    home_score = home_away_list[2]
    away_score = home_away_list[3]
    
    if home_score - away_score > 0:         # home team win
        return [2,home_team,away_team][flag]
    elif home_score - away_score == 0:      # tie
        return [1,"no_team","no_team"][flag]
    else:                                   # away team win
        return [0,away_team,home_team][flag]

In [59]:
#define a function to add home/win team

def determine_win_team(df):
    '''determin the winning team in the database
    it calls the function home_win
    
    INPUT:
    a dataframe with 'home_team','away_team','home_score',and 'away_score'
    
    OUTPUT:
    a dataframe appended 'home_win_flag','win_team', and 'lose_team'
    
    '''
    df['home_win_flag'] = df[['home_team','away_team','home_score','away_score']].apply(home_win, args=(0,), axis=1)
    df['win_team'] = df[['home_team','away_team','home_score','away_score']].apply(home_win, args=(1,), axis=1)
    df['lose_team'] = df[['home_team','away_team','home_score','away_score']].apply(home_win, args=(2,), axis=1)

    return df

In [60]:
data_df=convert_date(data_df)

In [61]:
data_df_complete = determine_win_team(data_df)

In [62]:
# count England home game cites

data_df_complete.query('home_team == "England" and neutral == False')['city'].value_counts()

London            336
Manchester         24
Liverpool          21
Birmingham         10
Sunderland          7
Newcastle           7
Sheffield           7
Blackburn           5
Middlesbrough       4
Wolverhampton       4
Leeds               3
Stoke-on-Trent      3
Derby               3
Nottingham          2
West Bromwich       2
Southampton         2
Bristol             2
Huddersfield        1
Bradford            1
Portsmouth          1
Ipswich             1
Crewe               1
Burnley             1
Blackpool           1
Leicester           1
Richmond            1
Name: city, dtype: int64