In [16]:
import pandas as pd

In [8]:
def log_score(team, pts, team_game_map):
    '''Write down the running points of a team after game i'''
    
    team_scores = team_game_map[team]
    if len(team_scores) > 0:
        team_game_map[team].append(pts + team_scores[-1])
    else:
        team_game_map[team].append(pts)

In [9]:
def relegation_stat(tgm):
    '''Calculate closeness to relegation for the points tgm from team_game_map[someteam]'''
    
    out = [0]
    max_pts = 3*38
    
    for idx, pts in enumerate(tgm):
        theo_pts = max_pts - (idx+1)*3
        r = 1 - ((pts + theo_pts) / max_pts)
        out.append(r)
        
    return out

In [13]:
def add_releg_status(df, teams, reg_stats):
    team_idx = dict(zip(teams, [0]*len(teams)))
    for idx, row in df.iterrows():
        home = row.HomeTeam
        away = row.AwayTeam
        home_reg_stat = reg_stats[home][team_idx[home]]
        away_reg_stat = reg_stats[away][team_idx[away]]
        team_idx[home] = team_idx[home] + 1
        team_idx[away] = team_idx[away] + 1
        df.set_value(idx, 'HomeTeamCR', home_reg_stat)
        df.set_value(idx, 'AwayTeamCR', away_reg_stat)

In [14]:
def strip_and_append_relegation(filename):
    data = pd.read_csv(filename, parse_dates=True, dayfirst=True, index_col=[1])
    df = data[["HomeTeam", "AwayTeam", "FTR"]].copy()
    df.to_csv(filename + "_stripped.csv")
    teams = df.HomeTeam.unique()
    games = [[]*38 for team in teams]
    team_game_map = dict(zip(teams, games))
    
    for idx, row in df.iterrows():
        home = row.HomeTeam
        away = row.AwayTeam
        if row.FTR == "D":
            log_score(home, 1, team_game_map)
            log_score(away, 1, team_game_map)
        elif row.FTR == "H":
            log_score(home, 3, team_game_map)
            log_score(away, 0, team_game_map)
        else:
            log_score(away, 3, team_game_map)
            log_score(home, 0, team_game_map)

    df['HomeTeamCR'] = pd.Series([None]*df.shape[0], index=df.index)
    df['AwayTeamCR'] = pd.Series([None]*df.shape[0], index=df.index)
    df.to_csv(filename + "_stripped_relegation.csv")
    
    reg_stats = {team: relegation_stat(team_game_map[team]) for team in teams}
    add_releg_status(df, teams, reg_stats)

In [15]:
filename = '15-16.csv'
strip_and_append_relegation(filename)