### Import packages and dataset

In [1]:
import pandas as pd
import numpy as np
import json
import ast

df = pd.read_csv('LOL/LeagueofLegends.csv')

print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7620 entries, 0 to 7619
Data columns (total 57 columns):
League              7620 non-null object
Year                7620 non-null int64
Season              7620 non-null object
Type                7620 non-null object
blueTeamTag         7582 non-null object
bResult             7620 non-null int64
rResult             7620 non-null int64
redTeamTag          7583 non-null object
gamelength          7620 non-null int64
golddiff            7620 non-null object
goldblue            7620 non-null object
bKills              7620 non-null object
bTowers             7620 non-null object
bInhibs             7620 non-null object
bDragons            7620 non-null object
bBarons             7620 non-null object
bHeralds            7620 non-null object
goldred             7620 non-null object
rKills              7620 non-null object
rTowers             7620 non-null object
rInhibs             7620 non-null object
rDragons            7620 non-null ob

### Explore sample row

In [2]:
for cols in df.columns:
    print('\nDatapoint #10 in {}: \n==>\t{}'.format(cols, df[cols][10]))


Datapoint #10 in League: 
==>	NALCS

Datapoint #10 in Year: 
==>	2015

Datapoint #10 in Season: 
==>	Spring

Datapoint #10 in Type: 
==>	Season

Datapoint #10 in blueTeamTag: 
==>	GV

Datapoint #10 in bResult: 
==>	1

Datapoint #10 in rResult: 
==>	0

Datapoint #10 in redTeamTag: 
==>	DIG

Datapoint #10 in gamelength: 
==>	52

Datapoint #10 in golddiff: 
==>	[0, -10, -15, -274, -154, -49, 134, 122, 391, 423, 534, 462, 611, 704, 1383, 989, 1711, 2633, 2806, 2978, 3074, 4779, 4782, 4901, 4710, 5156, 4819, 4936, 6265, 6198, 6537, 6664, 6800, 6810, 6492, 6755, 7299, 7451, 7574, 7368, 7063, 7176, 7524, 7707, 7880, 7067, 6844, 6963, 8097, 9963, 10528, 12212]

Datapoint #10 in goldblue: 
==>	[2415, 2415, 2716, 3843, 5190, 6687, 8109, 9421, 10854, 12389, 13931, 15242, 16590, 18201, 19936, 21125, 23381, 25536, 27097, 28535, 29669, 32282, 33522, 35206, 36835, 38388, 39422, 40741, 43478, 44944, 46608, 47794, 49448, 50513, 51790, 53010, 54998, 56562, 57763, 58917, 59861, 61993, 63552, 64987, 6620

### Define columns to be modified, and begin transformation based on lambda functions to parse data to list form 

In [3]:
count_cols = ['goldred', 'bKills', 'rKills', 'bTowers', 'rTowers', 'bInhibs', 'rInhibs', 
            'bDragons', 'rDragons', 'bBarons', 'rBarons', 'bHeralds', 'rHeralds']

champ_gold_cols = ['goldblueTop', 'goldblueMiddle', 'goldblueJungle', 'goldblueSupport', 'goldblueADC', 
                    'goldredTop', 'goldredMiddle', 'goldredJungle', 'goldredSupport', 'goldredADC']

# goldred - goldblue

for cols in count_cols:
    if cols in ['goldred', 'goldblue']:
        df[cols] = df.apply(lambda row: json.loads(row[cols]), axis=1)
    else:
        df[cols] = df.apply(lambda row: ast.literal_eval(row[cols]), axis=1)
        df[str(cols+'_pre15')] = df.apply(lambda row: sum([1 if int(i[0]) <= 15 else 0 for i in row[cols]]), axis=1)


for cols in champ_gold_cols:
    df[cols] = df.apply(lambda row: json.loads(row[cols]), axis=1)

players = ['TopgoldDiff', 'MidgoldDiff', 'JunglegoldDiff', 'SupportgoldDiff', 'ADCgoldDiff']
blue_players_gold = ['goldblueTop', 'goldblueMiddle', 'goldblueJungle', 'goldblueSupport', 'goldblueADC']
red_players_gold = ['goldredTop', 'goldredMiddle', 'goldredJungle', 'goldredSupport', 'goldredADC']

for rChampG, bChampG, Champ in zip(red_players_gold, blue_players_gold, players):
    df[Champ] = df.apply(lambda row: np.subtract(row[rChampG], row[bChampG]), axis=1)
    pass

df['golddiff_min15'] = df.apply(lambda row: json.loads(row['golddiff'])[15], axis=1)
df['golddiff_final'] = df.apply(lambda row: json.loads(row['golddiff'])[-1], axis=1)

### Explore modified dataset

In [4]:
for cols in df.columns:
    print('\nDatapoint #10 in {}: \n==>\t{}'.format(cols, df[cols][10]))


Datapoint #10 in League: 
==>	NALCS

Datapoint #10 in Year: 
==>	2015

Datapoint #10 in Season: 
==>	Spring

Datapoint #10 in Type: 
==>	Season

Datapoint #10 in blueTeamTag: 
==>	GV

Datapoint #10 in bResult: 
==>	1

Datapoint #10 in rResult: 
==>	0

Datapoint #10 in redTeamTag: 
==>	DIG

Datapoint #10 in gamelength: 
==>	52

Datapoint #10 in golddiff: 
==>	[0, -10, -15, -274, -154, -49, 134, 122, 391, 423, 534, 462, 611, 704, 1383, 989, 1711, 2633, 2806, 2978, 3074, 4779, 4782, 4901, 4710, 5156, 4819, 4936, 6265, 6198, 6537, 6664, 6800, 6810, 6492, 6755, 7299, 7451, 7574, 7368, 7063, 7176, 7524, 7707, 7880, 7067, 6844, 6963, 8097, 9963, 10528, 12212]

Datapoint #10 in goldblue: 
==>	[2415, 2415, 2716, 3843, 5190, 6687, 8109, 9421, 10854, 12389, 13931, 15242, 16590, 18201, 19936, 21125, 23381, 25536, 27097, 28535, 29669, 32282, 33522, 35206, 36835, 38388, 39422, 40741, 43478, 44944, 46608, 47794, 49448, 50513, 51790, 53010, 54998, 56562, 57763, 58917, 59861, 61993, 63552, 64987, 6620

### Save to new csv file

In [5]:
df.to_csv('LOL/Clean_LeagueofLegends.csv')