In [52]:
import praw
import pandas as pd
import numpy as np
import time
import datetime
import itertools as it



def winners(filename):

    '''Parse baseball data and determine the winner of each game'''

    df = pd.read_csv(filename)
    winners = []
    dates = []
    for line in df.iterrows():
        date = line[1]['yyyymmdd']
        date = pd.to_datetime(date,format='%Y%m%d')
        dates.append(date)
        visiting = line[1]['visiting Team']
        home = line[1]['home team']
        visiting_score = line[1][' Visiting team score']
        home_score = line[1]['home team score']
        if visiting_score > home_score:
            winner = visiting
        elif visiting_score < home_score:
            winner = home
        elif visiting_score == home_score:
            winner = 'tie'
        else: winner = 'err'
        #print(winner)
        winners.append(winner)
    date_series = pd.DataFrame(pd.Series(dates))
    df['yyyymmdd'] = date_series
    winners_series = pd.Series(winners)
    winners_series = pd.DataFrame(winners_series,columns=['winner'])
    df=df.join(winners_series)
    return df

def bin_winners(df,filename):
    
    '''Create a dataframe of the count of wins per week, per team'''
    # Initializations
    first_date = df.iloc[0]['yyyymmdd']
    last_date = df.iloc[-1]['yyyymmdd']
    week_delta = datetime.timedelta(days=7)
    day_delta = datetime.timedelta(days=1)
    week_starts = [first_date]
    
    # Create keys as the first day of every week
    
    while( week_starts[-1] < last_date):
        week_starts.append(week_starts[-1] + week_delta)    
    week_dict = {key: None for key in week_starts}
    
    # Count wins per week per team
    
    for week in week_dict.keys():
        week_performance = {}
        for i in range(0,7):
            date = week + (day_delta*i)
            for game in df.loc[df['yyyymmdd'] == date].iterrows():          
                if game[1]['winner'] in week_performance.keys():
                    week_performance[game[1]['winner']] += 1
                else: week_performance[game[1]['winner']] = 1    
        week_dict[week] = week_performance
    
    # Convert dict to pandas
    
    results = pd.DataFrame(week_dict).fillna(value=0).astype(int)
    results.to_csv(filename)
    print(results.index.tolist())
    return results
    

#Run functions on 2015 data
df = winners('baseball_data_2015.csv')
bin_winners(df,'team_stats_2015.csv')




['ANA', 'ARI', 'ATL', 'BAL', 'BOS', 'CHA', 'CHN', 'CIN', 'CLE', 'COL', 'DET', 'HOU', 'KCA', 'LAN', 'MIA', 'MIL', 'MIN', 'NYA', 'NYN', 'OAK', 'PHI', 'PIT', 'SDN', 'SEA', 'SFN', 'SLN', 'TBA', 'TEX', 'TOR', 'WAS']


Unnamed: 0,2015-04-05 00:00:00,2015-04-12 00:00:00,2015-04-19 00:00:00,2015-04-26 00:00:00,2015-05-03 00:00:00,2015-05-10 00:00:00,2015-05-17 00:00:00,2015-05-24 00:00:00,2015-05-31 00:00:00,2015-06-07 00:00:00,...,2015-08-02 00:00:00,2015-08-09 00:00:00,2015-08-16 00:00:00,2015-08-23 00:00:00,2015-08-30 00:00:00,2015-09-06 00:00:00,2015-09-13 00:00:00,2015-09-20 00:00:00,2015-09-27 00:00:00,2015-10-04 00:00:00
ANA,2,3,4,2,3,5,3,4,2,4,...,3,2,3,2,3,4,4,4,5,0
ARI,3,3,2,2,3,2,5,3,4,2,...,3,4,4,2,2,3,3,3,4,1
ATL,5,2,2,2,3,3,4,3,3,3,...,5,1,1,1,0,2,3,3,3,2
BAL,3,3,2,3,2,2,4,4,2,6,...,3,3,3,1,2,3,5,3,4,1
BOS,4,3,3,2,1,5,2,2,4,1,...,2,3,4,4,3,4,3,4,4,0
CHA,1,3,3,1,3,5,3,3,3,3,...,1,3,4,2,4,3,3,3,3,0
CHN,2,4,3,4,2,6,3,1,4,4,...,5,6,3,3,4,5,5,2,7,1
CIN,4,1,3,4,3,3,0,3,2,5,...,2,2,0,2,2,5,3,0,1,0
CLE,2,2,2,2,2,4,5,4,4,2,...,2,4,3,5,3,4,4,4,3,1
COL,4,3,3,1,0,2,3,5,4,2,...,2,1,2,2,5,3,4,2,2,1
