In [2]:
import numpy as np
import pandas as pd 
import pickle


In [2]:
Data = pd.read_csv("epl.csv")

In [10]:
# Function to process the data of a game
# Results in a list containing tuple of teams, dictionary team 1 of events, dictionary team 2 of events
# dictionary team 1 of times, dictionary team 2 of times, player list team 1, player list team 2

def data_process_game(data, match_id):
    
    game_data = data.copy()
    game_data = game_data[game_data.match_id == match_id]
    
    # tuple of home and away team
    teams = (game_data.home_team_id.mode()[0], game_data.away_team_id.mode()[0])
    
    # Players
    home_players = game_data[game_data.team_id == teams[0]].player_id.unique()
    home_players = home_players[~np.isnan(home_players)]
    
    away_players = game_data[game_data.team_id == teams[1]].player_id.unique()
    away_players = away_players[~np.isnan(away_players)]
    
    # Dictionary on events
    
    home_passes = {}
    away_passes = {}
    
    # Dictionary on times
    
    home_time = {}
    away_time = {}
    max_time = game_data['min'].max()
    
    # Dictionary on 
    
    for i in home_players:
        for j in home_players:
            if i != j:
                home_passes[(i, j)] = 0
                home_time[(i, j)] = [0, max_time]
            if i == j:
                home_time[(i, 'Self')] = [0, max_time]
        home_passes[(i, 'Shot')] = 0
        home_passes[(i, 'Gain')] = 0
        home_passes[(i, 'Loss')] = 0
        home_passes[(i, 'Goal')] = 0
    
    for i in away_players:
        for j in away_players:
            if i != j:
                away_passes[(i, j)] = 0  
                away_time[(i, j)] = [0, max_time]
            if i == j:
                away_time[(i, 'Self')] = [0, max_time]
        away_passes[(i, 'Shot')] = 0
        away_passes[(i, 'Gain')] = 0
        away_passes[(i, 'Loss')] = 0
        away_passes[(i, 'Goal')] = 0
    
    for index, row in game_data.iterrows():
        
        # Get Pass Data
        
        if (row['type'] == 1) & (row['outcome'] == 1):
            
            if row['team_id'] == teams[0]:
                
                player1 = row['player_id']
                j = 1
                found_passee = False
                while (not found_passee) & (j < 4):
                    row2 = game_data.loc[index+j]
                    if (row2['team_id'] == teams[0]) & (row2['player_id'] != player1) & (not np.isnan(row2['player_id'])):
                        player2 = row2['player_id']
                        found_passee = True
                    j+=1
                 
                if found_passee: home_passes[(player1, player2)] += 1
            
            if row['team_id'] == teams[1]:

                player1 = row['player_id']
                j = 1
                found_passee = False
                while (not found_passee) & (j < 4):
                    row2 = game_data.loc[index+j]
                    if (row2['team_id'] == teams[1]) & (row2['player_id'] != player1) & (not np.isnan(row2['player_id'])):
                        player2 = row2['player_id']
                        found_passee = True
                    j+=1
                  
                if found_passee: away_passes[(player1, player2)] += 1
                
        # Get Gain Data
        # On ball recovery, out of bounds, keeper pick up, and corner awarded

        if (row['type'] == 49) | ((row['type'] == 5) & (row['outcome'] == 1)) \
        | (row['type'] == 52) | ((row['type'] == 6) & (row['outcome'] == 1)):

            #print('y')

            if row['team_id'] == teams[0]:

                player1 = row['player_id']
                if not np.isnan(player1): home_passes[(player1, 'Gain')] += 1

            if row['team_id'] == teams[1]:

                player1 = row['player_id']
                if not np.isnan(player1): away_passes[(player1, 'Gain')] += 1

        # Get Loss Data
        # On bad pass, out of bounds, bad take on, or dispossessed

        if ((row['type'] == 3) & (row['outcome'] == 0)) | ((row['type'] == 5) & (row['outcome'] == 0)) \
        | ((row['type'] == 50) & (row['outcome'] == 1)) | ((row['type'] == 1) & (row['outcome'] == 0)):

            if row['team_id'] == teams[0]:

                player1 = row['player_id']
                if not np.isnan(player1): home_passes[(player1, 'Loss')] += 1

            if row['team_id'] == teams[1]:

                player1 = row['player_id']
                if not np.isnan(player1): away_passes[(player1, 'Loss')] += 1

        # Get Shot Data
        # On miss, post, attempt saved, or goal

        if (row['type'] == 13) | (row['type'] == 14) | (row['type'] == 15):

            if row['team_id'] == teams[0]:

                player1 = row['player_id']
                home_passes[(player1, 'Shot')] += 1

            if row['team_id'] == teams[1]:

                player1 = row['player_id']
                away_passes[(player1, 'Shot')] += 1

        if (row['type'] == 16):

            if row['team_id'] == teams[0]:

                player1 = row['player_id']
                home_passes[(player1, 'Shot')] += 1
                home_passes[(player1, 'Goal')] += 1

            if row['team_id'] == teams[1]:

                player1 = row['player_id']
                away_passes[(player1, 'Shot')] += 1
                away_passes[(player1, 'Goal')] += 1

    # Get Time Data

    sub_data = game_data[(game_data.type == 19) | (game_data.type == 18)]


    for index, row in sub_data.iterrows():

        if row['type'] == 19:

            if row['team_id'] == teams[0]:

                player = row['player_id']
                home_time[(player, 'Self')][0] = row['min']

            if row['team_id'] == teams[1]:

                player = row['player_id']
                away_time[(player, 'Self')][0] = row['min']

        if row['type'] == 18:

            if row['team_id'] == teams[0]:

                player = row['player_id']
                home_time[(player, 'Self')][1] = row['min']

            if row['team_id'] == teams[1]:

                player = row['player_id']
                away_time[(player, 'Self')][1] = row['min']

    for i in home_players:
        for j in home_players:
            if i != j:
                home_time[(i, j)] = [max(home_time[(i, 'Self')][0], home_time[(j, 'Self')][0]), \
                                 min(home_time[(i, 'Self')][1], home_time[(j, 'Self')][1])]
                if home_time[(i, j)][0] >= home_time[(i, j)][1]:
                    home_time[(i, j)] = [0, float('inf')]
        if home_time[(i, 'Self')][0] >= home_time[(i, 'Self')][1]:
                home_time[(i, 'Self')] = [0, float('inf')]
            
    for i in away_players:
        for j in away_players:
            if i != j:
                away_time[(i, j)] = [max(away_time[(i, 'Self')][0], away_time[(j, 'Self')][0]), \
                                 min(away_time[(i, 'Self')][1], away_time[(j, 'Self')][1])]
                if away_time[(i, j)][0] >= away_time[(i, j)][1]:
                    away_time[(i, j)] = [0, float('inf')]
        if away_time[(i, 'Self')][0] >= away_time[(i, 'Self')][1]:
             away_time[(i, 'Self')] = [0, float('inf')]
    
    # Get Time Weighted Event Rates
    # This is number of events divided by time shared between players or total player time
    
    home_rates = {}
    away_rates = {}
    
    for i in home_players:
        for j in home_players:
            if i != j:
                home_rates[(i,j)] = float(home_passes[(i,j)])/(home_time[(i,j)][1] - home_time[(i,j)][0])
        home_rates[(i, 'Shot')] = float(home_passes[(i, 'Shot')])/(home_time[(i, 'Self')][1] - home_time[(i, 'Self')][0])
        home_rates[(i, 'Gain')] = float(home_passes[(i, 'Gain')])/(home_time[(i, 'Self')][1] - home_time[(i, 'Self')][0])
        home_rates[(i, 'Loss')] = float(home_passes[(i, 'Loss')])/(home_time[(i, 'Self')][1] - home_time[(i, 'Self')][0])
        home_rates[(i, 'Goal')] = float(home_passes[(i, 'Goal')])/(home_time[(i, 'Self')][1] - home_time[(i, 'Self')][0])

    for i in away_players:
        for j in away_players:
            if i != j:
                away_rates[(i,j)] = float(away_passes[(i,j)])/(away_time[(i,j)][1] - away_time[(i,j)][0])
        away_rates[(i, 'Shot')] = float(away_passes[(i, 'Shot')])/(away_time[(i, 'Self')][1] - away_time[(i, 'Self')][0])
        away_rates[(i, 'Gain')] = float(away_passes[(i, 'Gain')])/(away_time[(i, 'Self')][1] - away_time[(i, 'Self')][0])
        away_rates[(i, 'Loss')] = float(away_passes[(i, 'Loss')])/(away_time[(i, 'Self')][1] - away_time[(i, 'Self')][0])
        away_rates[(i, 'Goal')] = float(away_passes[(i, 'Goal')])/(away_time[(i, 'Self')][1] - away_time[(i, 'Self')][0])
    
    return [teams, home_passes, away_passes, home_time, away_time, home_rates, away_rates, home_players, away_players]
                    
                



                

In [12]:
# Iterate through every game to output list in dictionary

matches = Data.match_id.unique()
matches = matches[~np.isnan(matches)]
n = len(matches)
ind = 1

games = {} 

for match_id in matches: 
    print('match ' + str(ind) + '/' + str(n))
    L = data_process_game(Data, match_id)
    games[match_id] = L
    ind += 1

    
    
    


match 1/380
match 2/380
match 3/380
match 4/380
match 5/380
match 6/380
match 7/380
match 8/380
match 9/380
match 10/380
match 11/380
match 12/380
match 13/380
match 14/380
match 15/380
match 16/380
match 17/380
match 18/380
match 19/380
match 20/380
match 21/380
match 22/380
match 23/380
match 24/380
match 25/380
match 26/380
match 27/380
match 28/380
match 29/380
match 30/380
match 31/380
match 32/380
match 33/380
match 34/380
match 35/380
match 36/380
match 37/380
match 38/380
match 39/380
match 40/380
match 41/380
match 42/380
match 43/380
match 44/380
match 45/380
match 46/380
match 47/380
match 48/380
match 49/380
match 50/380
match 51/380
match 52/380
match 53/380
match 54/380
match 55/380
match 56/380
match 57/380
match 58/380
match 59/380
match 60/380
match 61/380
match 62/380
match 63/380
match 64/380
match 65/380
match 66/380
match 67/380
match 68/380
match 69/380
match 70/380
match 71/380
match 72/380
match 73/380
match 74/380
match 75/380
match 76/380
match 77/380
match 78

In [14]:
output = open('EPL_Games.pkl', 'wb')
pickle.dump(games, output)
output.close()

In [3]:
pkl_file = open('EPL_Games.pkl', 'rb')
Data = pickle.load(pkl_file)
pkl_file.close()


In [150]:
def simple_features(data, match_id, mean=True): 
    
    # Grab Data
    
    game_data = data[match_id]
    
    teams = game_data[0]
    
    home_events = game_data[1]
    away_events = game_data[2]
    
    home_rates = game_data[5]
    away_rates = game_data[6]
    
    home_players = game_data[7]
    away_players = game_data[8]
    
    # Create feature vector for each team within the game
    # Features include average shot, gain, and loss rates. 
    # Features also include average pass rate, average max pass rate, 
    # and average min pass rate (non-zero) over players
    # Features also include number players fielded, home/away 
    
    def min2(x): 
        x = np.array(x)
        y = x[x!=0]
        if len(np.array(y)) == 0:
            return 0
        return min(y)
    
    n_home = len(home_players)
    
    goals_home = 0
    shots = 0
    gains = 0
    losses = 0
    pass_rate = 0
    max_pass = 0
    min_pass = 0
    
    for i in home_players:
        
        goals_home += home_events[(i, 'Goal')]
        shots += home_rates[(i, 'Shot')]
        gains += home_rates[(i, 'Gain')]
        losses += home_rates[(i, 'Loss')]
        
        pass_rate += (sum([home_rates[(i, j)] for j in home_players[home_players != i]])/float(n_home-1))
        max_pass += (max([home_rates[(i, j)] for j in home_players[home_players != i]]))
        min_pass += (min2([home_rates[(i, j)] for j in home_players[home_players != i]]))
    
    if mean:
        
        shots = shots/float(n_home)
        gains = gains/float(n_home)
        losses = losses/float(n_home)    
        pass_rate = pass_rate/float(n_home)
        max_pass = max_pass/float(n_home)
        min_pass = min_pass/float(n_home)
        
    feature_home = np.array([match_id, teams[0], shots, gains, losses, pass_rate, max_pass, min_pass, n_home, 1, goals_home])
                         
    
    n_away = len(away_players)
    
    goals_away = 0
    shots = 0
    gains = 0
    losses = 0
    pass_rate = 0
    max_pass = 0
    min_pass = 0
    
    for i in away_players:
        
        goals_away += away_events[(i, 'Goal')]
        shots += away_rates[(i, 'Shot')]
        gains += away_rates[(i, 'Gain')]
        losses += away_rates[(i, 'Loss')]
        
        pass_rate += (sum([away_rates[(i, j)] for j in away_players[away_players != i]])/float(n_away-1))
        max_pass += (max([away_rates[(i, j)] for j in away_players[away_players != i]]))
        min_pass += (min2([away_rates[(i, j)] for j in away_players[away_players != i]]))
           
    if mean:
        
        shots = shots/float(n_away)
        gains = gains/float(n_away)
        losses = losses/float(n_away)    
        pass_rate = pass_rate/float(n_away)
        max_pass = max_pass/float(n_away)
        min_pass = min_pass/float(n_away)
    
    if goals_home > goals_away:
        winner = 1
    elif goals_home < goals_away:
        winner = -1
    elif goals_home == goals_away:
        winner = 0
        
    GD = goals_home - goals_away

    feature_away = np.array([match_id, teams[1], shots, gains, losses, pass_rate, max_pass, min_pass, n_away, 0, goals_away, -winner])
    feature_home = np.append(feature_home, winner)
    
    feature_game = np.hstack([match_id, feature_home[[2,3,4,5,6,7,8]], feature_away[[2,3,4,5,6,7,8]], teams[0], teams[1], GD, winner])
    
    return np.vstack([feature_home, feature_away]), feature_game
    

In [159]:
# Iterate through every game to output feature dataframe

matches = data.keys()
n = len(matches)
ind = 1 
sf = []

for match_id in matches: 
    print('match ' + str(ind) + '/' + str(n))
    F, F_game = simple_features(Data, match_id)
    
    if len(sf) == 0:
        sf = F
        sf_game = F_game
    else:
        sf = np.vstack([F, sf])
        sf_game = np.vstack([F_game, sf_game])
        
    ind += 1
    
df_sf = pd.DataFrame(data=sf, columns=['match_id', 'team_id', 'shot_rate', 'gain_rate', 'loss_rate', 'pass_rate', \
                                       'max_pass', 'min_pass', 'number_players', 'home', 'goals', 'result']) 

df_sf_game = pd.DataFrame(data=sf_game, columns=['match_id', 'home_shot_rate', 'home_gain_rate', 'home_loss_rate', \
                                                 'home_pass_rate', 'home_max_pass', 'home_min_pass', 'home_number_players',\
                                                 'away_shot_rate', 'away_gain_rate', 'away_loss_rate', 'away_pass_rate', 'away_max_pass', \
                                                 'away_min_pass','away_number_players','home_team_id', 'away_team_id', \
                                                 'goal_dif', 'result']) 

    

match 1/380
match 2/380
match 3/380
match 4/380
match 5/380
match 6/380
match 7/380
match 8/380
match 9/380
match 10/380
match 11/380
match 12/380
match 13/380
match 14/380
match 15/380
match 16/380
match 17/380
match 18/380
match 19/380
match 20/380
match 21/380
match 22/380
match 23/380
match 24/380
match 25/380
match 26/380
match 27/380
match 28/380
match 29/380
match 30/380
match 31/380
match 32/380
match 33/380
match 34/380
match 35/380
match 36/380
match 37/380
match 38/380
match 39/380
match 40/380
match 41/380
match 42/380
match 43/380
match 44/380
match 45/380
match 46/380
match 47/380
match 48/380
match 49/380
match 50/380
match 51/380
match 52/380
match 53/380
match 54/380
match 55/380
match 56/380
match 57/380
match 58/380
match 59/380
match 60/380
match 61/380
match 62/380
match 63/380
match 64/380
match 65/380
match 66/380
match 67/380
match 68/380
match 69/380
match 70/380
match 71/380
match 72/380
match 73/380
match 74/380
match 75/380
match 76/380
match 77/380
match 78

In [160]:
df_sf

Unnamed: 0,match_id,team_id,shot_rate,gain_rate,loss_rate,pass_rate,max_pass,min_pass,number_players,home,goals,result
0,442367.0,110.0,0.005679,0.063082,0.139298,0.017340,0.073505,0.021773,14.0,1.0,1.0,-1.0
1,442367.0,6.0,0.021390,0.072041,0.129562,0.043597,0.146472,0.024469,14.0,0.0,2.0,1.0
2,442366.0,108.0,0.010458,0.069583,0.122125,0.027191,0.083790,0.018421,13.0,1.0,0.0,-1.0
3,442366.0,43.0,0.030875,0.089764,0.125116,0.052057,0.167208,0.028343,14.0,0.0,2.0,1.0
4,442365.0,52.0,0.009306,0.088736,0.204603,0.037657,0.130821,0.020391,14.0,1.0,1.0,-1.0
5,442365.0,4.0,0.009063,0.090023,0.198498,0.022258,0.099503,0.035140,14.0,0.0,2.0,1.0
6,442364.0,45.0,0.009681,0.070259,0.135022,0.025609,0.123994,0.035989,15.0,1.0,3.0,1.0
7,442364.0,35.0,0.010141,0.071944,0.144806,0.040617,0.147698,0.019575,14.0,0.0,1.0,-1.0
8,442363.0,1.0,0.016155,0.088670,0.122843,0.044293,0.133947,0.021515,14.0,1.0,2.0,1.0
9,442363.0,80.0,0.007248,0.069204,0.111009,0.044408,0.155709,0.029282,14.0,0.0,1.0,-1.0


In [161]:
df_sf_game

Unnamed: 0,match_id,home_shot_rate,home_gain_rate,home_loss_rate,home_pass_rate,home_max_pass,home_min_pass,home_number_players,away_shot_rate,away_gain_rate,away_loss_rate,away_pass_rate,away_max_pass,away_min_pass,away_number_players,home_team_id,away_team_id,goal_dif,result
0,442367.0,0.005679,0.063082,0.139298,0.017340,0.073505,0.021773,14.0,0.021390,0.072041,0.129562,0.043597,0.146472,0.024469,14.0,110.0,6.0,-1.0,-1.0
1,442366.0,0.010458,0.069583,0.122125,0.027191,0.083790,0.018421,13.0,0.030875,0.089764,0.125116,0.052057,0.167208,0.028343,14.0,108.0,43.0,-2.0,-1.0
2,442365.0,0.009306,0.088736,0.204603,0.037657,0.130821,0.020391,14.0,0.009063,0.090023,0.198498,0.022258,0.099503,0.035140,14.0,52.0,4.0,-1.0,-1.0
3,442364.0,0.009681,0.070259,0.135022,0.025609,0.123994,0.035989,15.0,0.010141,0.071944,0.144806,0.040617,0.147698,0.019575,14.0,45.0,35.0,2.0,1.0
4,442363.0,0.016155,0.088670,0.122843,0.044293,0.133947,0.021515,14.0,0.007248,0.069204,0.111009,0.044408,0.155709,0.029282,14.0,1.0,80.0,1.0,1.0
5,442362.0,0.014014,0.091162,0.119178,0.042802,0.150494,0.026689,14.0,0.023638,0.091871,0.120183,0.048466,0.180748,0.034783,14.0,54.0,14.0,-2.0,-1.0
6,442361.0,0.022815,0.087887,0.184775,0.047142,0.258125,0.105018,14.0,0.008872,0.081167,0.140806,0.021152,0.075173,0.017023,14.0,11.0,21.0,2.0,1.0
7,442360.0,0.013915,0.086271,0.141929,0.030056,0.084416,0.012987,11.0,0.012447,0.091974,0.151612,0.036018,0.119682,0.025544,14.0,7.0,8.0,-1.0,-1.0
8,442359.0,0.017078,0.099946,0.163998,0.039116,0.182020,0.059193,14.0,0.011966,0.087510,0.154947,0.045272,0.150503,0.026525,14.0,3.0,111.0,3.0,1.0
9,442358.0,0.008171,0.077615,0.155296,0.020241,0.093713,0.022416,14.0,0.016559,0.069455,0.144247,0.026013,0.081505,0.017429,13.0,21.0,4.0,0.0,0.0


In [162]:
output = open('simple_features_by_team_game.pkl', 'wb')
pickle.dump(df_sf, output)
output.close()

In [163]:
output = open('simple_features_by_game.pkl', 'wb')
pickle.dump(df_sf_game, output)
output.close()

In [118]:
data.keys()

[442368,
 442369,
 442370,
 442371,
 442372,
 442373,
 442374,
 442375,
 442376,
 442377,
 442378,
 441999,
 442000,
 442001,
 442002,
 442003,
 442004,
 442005,
 442006,
 442007,
 442008,
 442009,
 442010,
 442011,
 442012,
 442013,
 442014,
 442015,
 442016,
 442017,
 442018,
 442019,
 442020,
 442021,
 442022,
 442023,
 442024,
 442025,
 442026,
 442027,
 442028,
 442029,
 442030,
 442031,
 442032,
 442033,
 442034,
 442035,
 442036,
 442037,
 442038,
 442039,
 442040,
 442041,
 442042,
 442043,
 442044,
 442045,
 442046,
 442047,
 442048,
 442049,
 442050,
 442051,
 442052,
 442053,
 442054,
 442055,
 442056,
 442057,
 442058,
 442059,
 442060,
 442061,
 442062,
 442063,
 442064,
 442065,
 442066,
 442067,
 442068,
 442069,
 442070,
 442071,
 442072,
 442073,
 442074,
 442075,
 442076,
 442077,
 442078,
 442079,
 442080,
 442081,
 442082,
 442083,
 442084,
 442085,
 442086,
 442087,
 442088,
 442089,
 442090,
 442091,
 442092,
 442093,
 442094,
 442095,
 442096,
 442097,
 442098,
 

In [111]:
simple_features(data, 442156)

array([[4.42156000e+05, 8.00000000e+01, 2.37235647e-02, 7.86901094e-02,
        1.36855765e-01, 5.45562854e-02, 1.52271670e-01, 1.48603222e-02,
        1.20000000e+01, 1.00000000e+00, 0.00000000e+00],
       [4.42156000e+05, 4.50000000e+01, 7.51170908e-03, 6.96597257e-02,
        1.37407783e-01, 2.51065736e-02, 1.08408090e-01, 1.72222658e-02,
        1.30000000e+01, 0.00000000e+00, 1.00000000e+00]])

In [31]:
data.keys()

[442368,
 442369,
 442370,
 442371,
 442372,
 442373,
 442374,
 442375,
 442376,
 442377,
 442378,
 441999,
 442000,
 442001,
 442002,
 442003,
 442004,
 442005,
 442006,
 442007,
 442008,
 442009,
 442010,
 442011,
 442012,
 442013,
 442014,
 442015,
 442016,
 442017,
 442018,
 442019,
 442020,
 442021,
 442022,
 442023,
 442024,
 442025,
 442026,
 442027,
 442028,
 442029,
 442030,
 442031,
 442032,
 442033,
 442034,
 442035,
 442036,
 442037,
 442038,
 442039,
 442040,
 442041,
 442042,
 442043,
 442044,
 442045,
 442046,
 442047,
 442048,
 442049,
 442050,
 442051,
 442052,
 442053,
 442054,
 442055,
 442056,
 442057,
 442058,
 442059,
 442060,
 442061,
 442062,
 442063,
 442064,
 442065,
 442066,
 442067,
 442068,
 442069,
 442070,
 442071,
 442072,
 442073,
 442074,
 442075,
 442076,
 442077,
 442078,
 442079,
 442080,
 442081,
 442082,
 442083,
 442084,
 442085,
 442086,
 442087,
 442088,
 442089,
 442090,
 442091,
 442092,
 442093,
 442094,
 442095,
 442096,
 442097,
 442098,
 

In [20]:
x

array([2, 5, 4])

In [21]:
y = 2

In [143]:
y[[0,2]]

array([2, 3])

In [122]:
y = np.array([2,5,3])

In [144]:
np.append(y[[0,2]], 2)

array([2, 3, 2])

In [149]:
np.append(2, y[[0,2]], 2)

ValueError: zero-dimensional arrays cannot be concatenated

In [76]:
y[y!=0]

array([ 78091.,  42774., 106760.,  39155.,  57328.,  19197.,  11037.,
        80447., 102884.,  38580.,  40145.,  44683.,  17339.,  15976.])

In [102]:
x = 0
for i in data[442156][8]:
    x += data[442156][6][(i, 'Shot')]

In [103]:
x/float(13)

0.007511709080508729

In [54]:
min(y)

11037.0

In [57]:
np.array(y)

array([ 78091.,  42774., 106760.,  39155.,  57328.,  19197.,  11037.,
        80447., 102884.,  38580.,  40145.,  44683.,  17339.,  15976.])

In [113]:
y

array([ 78091.,  42774., 106760.,  39155.,  57328.,  19197.,  11037.,
        80447., 102884.,  38580.,  40145.,  44683.,  17339.,  15976.])

In [115]:
y[[2,4,7]]

array([106760.,  57328.,  80447.])

In [82]:
len(z)

1

In [66]:
len(np.array(z))

0

In [75]:
len(np.array([[3,4,3]]))

1