In [None]:
# Getting required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

In [None]:
def read_and_process(id, date, team, venue):
  player = pd.read_csv(r'players/{}.csv'.format(id), index_col=['Start Date'])
  player.index = pd.DatetimeIndex(player.index)
  cols = ['Dis', 'Ct', 'St', 'Ct Wk', 'Ct Fi',
       'Runs_scored', 'Mins', 'BF', '4s', '6s', 'SR', 'Overs', 'Mdns',
       'Runs_concieved', 'Wkts', 'Econ']
  filter_time = (player.index < date)
  filtered_by_time = player[filter_time]
  overall = filtered_by_time[cols].mean().fillna(0).values

  filter_team = (player.Opposition == team)
  filtered_by_team = player[filter_team]
  against_team = filtered_by_team[cols].mean().fillna(0).values

  filter_venue = (player.Ground == venue)
  filtered_by_venue = player[filter_venue]
  on_venue = filtered_by_venue[cols].mean().fillna(0).values

  both = filtered_by_team[filter_venue][cols].mean().fillna(0).values

  features = np.hstack((overall, against_team, on_venue, both))
  return features
vread_and_process = np.vectorize(read_and_process,
                                 signature = '(),(),(),()->(n)') 
def data_prep(df, scores=scores):
  id = df['player_id'].values.astype(int)
  date = df['date']
  teams = df.opposition.values
  venues = df.venue.values
  
  X = vread_and_process(id, date, teams, venues)
  
  y = df.played.values.astype(float)
  
  return X, y 

In [None]:
path = '/content/drive/My Drive/Freelancer/Player Prediction/Excel Files'
os.chdir(path) # Changing current working directory
players = pd.read_csv('Players_with_not_played_in_matches.csv') # Loading from disk
players.head()5

Unnamed: 0,date,match,series,venue,opposition,player_id,player_name,playing_role,won,played
0,2006-08-30,225245,14612,Cardiff,England,42639,Shahid Afridi,Allrounder,1.0,1
1,2006-08-30,225245,14612,Cardiff,England,40570,Inzamam-ul-Haq,Batsman,1.0,1
2,2006-08-30,225245,14612,Cardiff,England,43650,Mohammad Yousuf,Batsman,1.0,1
3,2006-08-30,225245,14612,Cardiff,England,43652,Younis Khan,Batsman,1.0,1
4,2006-08-30,225245,14612,Cardiff,England,41434,Mohammad Hafeez,Allrounder,1.0,1


In [None]:
players_train, players_test = train_test_split(players,
                                               test_size = 0.2,
                                               random_state = 0)
print("training samples:", players_train.shape[0])
print("testing samples:", players_test.shape[0])

training samples: 2978
testing samples: 745


In [None]:
X_train, y_train = data_prep(players_train, scores)
X_test, y_test = data_prep(players_test, scores)



In [None]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
clf = MLPClassifier((64), early_stopping=True, random_state=0)
clf.fit(X_train_scaled, y_train)
clf.score(X_test_scaled, y_test)

0.8026845637583893

In [None]:
pred = clf.predict_proba(X_test)[:,1].round(2)

In [None]:
players_test['Prediction'] = pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


# Overall results

In [None]:
players_test.head(15)

Unnamed: 0,date,match,series,venue,opposition,player_id,player_name,playing_role,won,played,Prediction
2645,2014-12-14,742623,11580,Sharjah,New Zealand,41434,Mohammad Hafeez,Allrounder,1.0,1,1.0
1503,2011-11-11,530427,12704,Dubai,Sri Lanka,43590,Wahab Riaz,Bowler,1.0,0,0.0
1791,2012-06-13,562441,12506,Colombo,Sri Lanka,41434,Mohammad Hafeez,Allrounder,1.0,1,1.0
3539,2019-05-08,1152841,18664,London,England,512191,Fakhar Zaman,Batsman,1.0,1,1.0
489,2008-01-30,325803,14132,Faisalab,Zimbabwe,43235,Samiullah Khan,Bowler,1.0,0,0.0
534,2008-06-26,335351,13982,Karachi,India,42657,Shoaib Malik,Allrounder,0.0,1,1.0
1380,2011-02-05,473928,13008,Auckland,New Zealand,41378,Misbah-ul-Haq,Batsman,0.0,0,0.0
3523,2019-05-08,1152841,18664,London,England,227758,Imad Wasim,Allrounder,1.0,1,1.0
2366,2013-12-25,657643,11916,Abu Dhab,Sri Lanka,429981,Mohammad Irfan,Bowler,1.0,0,0.0
3325,2017-04-09,1077950,10927,Providen,West Indies,348144,Babar Azam,Batsman,1.0,1,1.0


## Team recomendation for 2014 series against Australa played at UAE
## Selecting 4 batsmen, 1 wicketkeeper, 2 all rounder, 4 bowlers


In [None]:
series = players[players.series == 13008]
X, y = data_prep(series)
X = scaler.transform(X)
y = clf.predict_proba(X)[:, 1]
series['recommended'] = y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [None]:
match = 473928
batsman = series[(series.match == match) & (series.playing_role == 'Batsman')].sort_values(by = ['recommended'], ascending = False).head(4)
keeper = series[(series.match == match) & (series.playing_role == 'Wicketkeeper')].sort_values(by = ['recommended'], ascending = False).head(1)
allrounder = series[(series.match == match) & (series.playing_role == 'Allrounder')].sort_values(by = ['recommended'], ascending = False).head(2)
bowler = series[(series.match == match) & (series.playing_role == 'Bowler')].sort_values(by = ['recommended'], ascending = False).head(4)
recommended = pd.concat((batsman, keeper, allrounder, bowler))
print("Chances of win:", recommended.recommended.mean())

squad = series.player_name.unique()
recommended_players = recommended.player_name.tolist()
left_out =  [player for player in squad if not player in recommended_players]
actual_team = series[(series.match == match) & (series.played == 1)].player_name.tolist()
left_out_actual =  [player for player in squad if not player in actual_team]
print("Players that were not recommended:")
print(left_out)
print()
print("Players not in actual team")
print(left_out_actual)

recommended[['player_name', 'playing_role', 'played', 'recommended']]

Chances of win: 0.8739407769499546
Players that were not recommended:
['Misbah-ul-Haq', 'Abdul Razzaq', 'Abdur Rehman']

Players not in actual team
['Misbah-ul-Haq', 'Abdur Rehman', 'Wahab Riaz']


Unnamed: 0,player_name,playing_role,played,recommended
1372,Ahmed Shehzad,Batsman,1,0.895689
1371,Umar Akmal,Batsman,1,0.80698
1373,Younis Khan,Batsman,1,0.785746
1379,Asad Shafiq,Batsman,1,0.718611
1374,Kamran Akmal,Wicketkeeper,1,0.998641
1376,Mohammad Hafeez,Allrounder,1,0.970124
1370,Shahid Afridi,Allrounder,1,0.969911
1377,Wahab Riaz,Bowler,0,0.878264
1382,Sohail Tanvir,Bowler,1,0.873656
1375,Shoaib Akhtar,Bowler,1,0.8647


In [None]:
def recommend_team(squad, date, opposition, venue, clf):
  ids = [players.player_id[players.player_name == name].iloc[0] for name in squad.player_name]
  X = vread_and_process(ids, date, opposition, venue)
  X = scaler.transform(X)
  y = clf.predict_proba(X)[:, 1]
  squad['recommended'] = y

  batsman = squad[(squad.playing_role == 'Batsman')].sort_values(by = ['recommended'], ascending = False).head(4)
  keeper = squad[(squad.playing_role == 'Wicketkeeper')].sort_values(by = ['recommended'], ascending = False).head(1)
  allrounder = squad[(squad.playing_role == 'Allrounder')].sort_values(by = ['recommended'], ascending = False).head(2)
  bowler = squad[(squad.playing_role == 'Bowler')].sort_values(by = ['recommended'], ascending = False).head(4)
  recommended = pd.concat((batsman, keeper, allrounder, bowler))
  print()
  print("Recommending from following players:")
  print(squad.player_name.tolist())
  print()
  print("Chances of win: {:.1f}%".format(squad.recommended[squad.recommended > 0.3].mean() * 100))

  return recommended[['player_name', 'playing_role', 'recommended']].reset_index(drop=True)


In [None]:
squad = players[players.series == 11291]
squad = squad.groupby('player_name').head(1).reset_index()[['player_name', 'playing_role']]
date = np.asarray(['2011-04-05'], dtype = object)
opposition = 'Sri Lanka'
venue = 'Sharjah'
recommend_team(squad, date, opposition, venue, clf)




Recommending from following players:
['Mohammad Irfan', 'Yasir Shah', 'Wahab Riaz', 'Aamer Yamin', 'Imad Wasim', 'Mohammad Rizwan', 'Shoaib Malik', 'Mohammad Hafeez', 'Ahmed Shehzad', 'Azhar Ali', 'Bilal Asif', 'Asad Shafiq', 'Sarfaraz Ahmed']

Chances of win: 68.3%


Unnamed: 0,player_name,playing_role,recommended
0,Ahmed Shehzad,Batsman,0.932872
1,Asad Shafiq,Batsman,0.370063
2,Azhar Ali,Batsman,0.236308
3,Mohammad Rizwan,Batsman,0.129117
4,Sarfaraz Ahmed,Wicketkeeper,0.898961
5,Mohammad Hafeez,Allrounder,0.983371
6,Shoaib Malik,Allrounder,0.915999
7,Wahab Riaz,Bowler,0.113374
8,Mohammad Irfan,Bowler,0.072742
9,Yasir Shah,Bowler,0.066174
