In [50]:
import pandas as pd
import numpy as np

def load_and_process(path):

    # Method Chain 1 (Load data, rename columns, sort rows by Sport then Year so that we can add win streaks, fix index)

    df1 = (
        pd.read_csv(path)
        .rename(columns={'year':'Year','level':'Level','sport':'Sport','winner':'First Place','winner_metro':'First Place Hometown',
                         'runner_up':'Second Place','runner_up_metro':'Second Place Hometown','final_four3':'Third Place',
                         'final_four3_metro':'Third Place Hometown','final_four4':'Fourth Place','final_four4_metro':'Fourth Place Hometown'}) 
        .sort_values(by=['Sport','Year'])
        .reset_index()
        .drop(columns='index')
        .rename(columns={'level_0':'Index'})
    )
    # Create list of win streaks to add to dataset

    streak = []
    prev = ''
    i = 1
    for team in df1['First Place']:
        if(team == prev):
            i=i+1
        else:
            i=1
        streak.append(i)
        prev = team
    winstreak = pd.Series(streak)

    # Method Chain 2 (add Win Streak column to dataset, capitalize Level column)

    df2 = (
        df1
        .assign(WinStreak=winstreak)
        .rename(columns={'WinStreak':'Win Streak'})
        .assign(Level=df1['Level'].str.capitalize())

    )
    
    return df2
    
load_and_process('../data/raw/dataset.csv')

Unnamed: 0,Index,Year,Level,Sport,First Place,First Place Hometown,Second Place,Second Place Hometown,Third Place,Third Place Hometown,Fourth Place,Fourth Place Hometown,Win Streak
0,238,1947,College,Baseball (M),California Golden Bears,"San Francisco Bay Area, CA",Yale Bulldogs,"New Haven, CT",New York University Violets,New York Metro Area,Texas Longhorns,"Austin, TX",1
1,246,1948,College,Baseball (M),USC Trojans,"Greater Los Angeles, CA",Yale Bulldogs,"New Haven, CT",Lafayette Leopards,"Lehigh Valley, NJ-PA",Baylor Bears,"Waco, TX",1
2,254,1949,College,Baseball (M),Texas Longhorns,"Austin, TX",Wake Forest Demon Deacons,"Winston-Salem, NC",Wake Forest Demon Deacons,"Winston-Salem, NC",USC Trojans,"Greater Los Angeles, CA",1
3,262,1950,College,Baseball (M),Texas Longhorns,"Austin, TX",Washington State Cougars,"Pullman, WA",Rutgers Scarlet Knights,"New Brunswick, NJ",Wisconsin Badgers,"Madison, WI",2
4,270,1951,College,Baseball (M),Oklahoma Sooners,"Oklahoma City, OK",Tennessee Volunteers,"Knoxville, TN",USC Trojans,"Greater Los Angeles, CA",Utah Utes,"Salt Lake City, UT",1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,932,2013,College,Volleyball (W),Penn State Nittany Lions,"State College, PA",Wisconsin Badgers,"Madison, WI",Texas Longhorns,"Austin, TX",Washington Huskies,"Seattle,WA",1
992,947,2014,College,Volleyball (W),Penn State Nittany Lions,"State College, PA",Brigham Young Cougars,"Provo, UT",Stanford Cardinal,"San Francisco Bay Area, CA",Texas Longhorns,"Austin, TX",2
993,960,2015,College,Volleyball (W),Nebraska Cornhuskers,"Lincoln, NE",Texas Longhorns,"Austin, TX",Kansas Jayhawks,"Lawrence, KS",Minnesota Golden Gophers,"Twin Cities, MN",1
994,971,2016,College,Volleyball (W),Stanford Cardinal,"San Francisco Bay Area, CA",Texas Longhorns,"Austin, TX",Minnesota Golden Gophers,"Twin Cities, MN",Nebraska Cornhuskers,"Lincoln, NE",1
