### Task: Explore the Datasets and Seggregate the forwads from other players based on the position

### Import the libraries

In [1]:
import pandas as pd
from pandas import DataFrame
from pymongo import MongoClient
import matplotlib.pyplot as plt

### Set up the MongoDB connection in Python

In [2]:
client=MongoClient('localhost',27017)
db=client['Wyscout']

### select only forwards from player_advance_stats and write to a DF

In [3]:
forward_details=db.player_advance_stats.aggregate([
{'$unwind': "$positions"

},
{'$project':{"_id":0,"playerId":1,"competitionId":1,"seasonId":1,"positions.position.code":1,"positions.position.name":1}
},
{'$match' : { '$or': [ { "positions.position.code" : "lwf" }, { "positions.position.code" : "rwf" } ] } 
}
  
])
forward_details_df=pd.DataFrame(forward_details)

In [4]:
forward_details_df.head(5)

Unnamed: 0,playerId,competitionId,seasonId,positions
0,247671,364,185727,"{'position': {'name': 'Right Wing Forward', 'c..."
1,3360,364,185727,"{'position': {'name': 'Left Wing Forward', 'co..."
2,105333,364,185727,"{'position': {'name': 'Right Wing Forward', 'c..."
3,105333,364,185727,"{'position': {'name': 'Left Wing Forward', 'co..."
4,391530,364,185727,"{'position': {'name': 'Left Wing Forward', 'co..."


### consider only the required columns

In [5]:
forward_details_df1 = forward_details_df[['playerId','competitionId','seasonId']]

In [6]:
len(forward_details_df1)

43

### Remove duplicate records & compare the number of records

In [7]:
forward_details_df1=forward_details_df1.drop_duplicates()

In [8]:
len(forward_details_df1)

38

#### while merging getting an error: 'trying to merge on object and int64 columns' so tried converting all the relevant cols to 'int32'
#### it didn't work and hence it should be converted to csv and read it back as DF, reference:https://stackoverflow.com/questions/50649853/trying-to-merge-2-dataframes-but-get-valueerror

In [9]:
forward_details_df1.to_csv('forward_details_df1.csv')

In [10]:
forward_details_df2 = pd.read_csv('forward_details_df1.csv', index_col=0)

In [12]:
# forward_details_df1.playerId.astype(int)

#### Read the 'matches_events' into DF & as mentioned above convert it into CSV & read it back into DF

In [5]:
event_details=db.matches_events_scaling_v02.find({},{"_id":0 })
event_details_df=pd.DataFrame(event_details)
event_details_df.head(5)

Unnamed: 0,index,id,playerId,teamId,matchId,matchPeriod,eventSec,eventId,eventName,subEventId,...,y start,x stop,y stop,tags,match_id,team_id,x start length (meters),y start length (meters),x stop length (meters),y stop length(meters)
0,0,613566210,346158,1609,2829960,1H,0.811034,8,Pass,85,...,51,28.0,63.0,1801.0,2829960,1609,52.5,34.68,29.4,42.84
1,1,613566221,20612,1609,2829960,1H,3.969.135,8,Pass,83,...,63,72.0,6.0,1801.0,2829960,1609,29.4,42.84,75.6,4.08
2,2,613566223,25867,1609,2829960,1H,7.277.111,7,Others on the ball,72,...,6,77.0,0.0,1302.0,2829960,1609,75.6,4.08,80.85,0.0
3,3,613565944,0,1625,2829960,1H,9.668.171,5,Interruption,50,...,100,,,,2829960,1609,24.15,68.0,0.0,0.0
4,4,613565945,8277,1625,2829960,1H,16.161.527,3,Free Kick,36,...,100,3.0,50.0,1801.0,2829960,1609,17.85,68.0,3.15,34.0


In [14]:
event_details_df.to_csv('event_details_df.csv')

In [15]:
event_details_df = pd.read_csv('event_details_df.csv', index_col=0)

In [16]:
event_details_df.head(2)

Unnamed: 0,index,id,playerId,teamId,matchId,matchPeriod,eventSec,eventId,eventName,subEventId,...,y start,x stop,y stop,tags,match_id,team_id,x start length (meters),y start length (meters),x stop length (meters),y stop length(meters)
0,0,613566210,346158,1609,2829960,1H,0.811034,8,Pass,85,...,51,28.0,63.0,1801,2829960,1609,52.5,34.68,29.4,42.84
1,1,613566221,20612,1609,2829960,1H,3.969.135,8,Pass,83,...,63,72.0,6.0,1801,2829960,1609,29.4,42.84,75.6,4.08


### Merge both the dataframes on 'playerId' : Final Result

In [18]:
ForwardEventDetails=forward_details_df2.merge(event_details_df, on='playerId', how = 'left')

In [19]:
ForwardEventDetails.head(5)

Unnamed: 0,playerId,competitionId,seasonId,index,id,teamId,matchId,matchPeriod,eventSec,eventId,...,y start,x stop,y stop,tags,match_id,team_id,x start length (meters),y start length (meters),x stop length (meters),y stop length(meters)
0,247671,364,185727,71.0,480360196.0,1609.0,2829966.0,1H,28.554.867,8.0,...,95.0,79.0,94.0,1801.0,2829966.0,1609.0,92.4,64.6,82.95,63.92
1,247671,364,185727,78.0,480360201.0,1609.0,2829966.0,1H,310.010.836,8.0,...,92.0,74.0,92.0,1801.0,2829966.0,1609.0,88.2,62.56,77.7,62.56
2,247671,364,185727,100.0,480360221.0,1609.0,2829966.0,1H,371.108.008,7.0,...,86.0,72.0,82.0,,2829966.0,1609.0,61.95,58.48,75.6,55.76
3,247671,364,185727,101.0,480360222.0,1609.0,2829966.0,1H,373.852.781,1.0,...,82.0,82.0,93.0,5047011802.0,2829966.0,1609.0,75.6,55.76,86.1,63.24
4,247671,364,185727,127.0,480360240.0,1609.0,2829966.0,1H,469.608.681,1.0,...,82.0,56.0,82.0,7031801.0,2829966.0,1609.0,58.8,55.76,58.8,55.76
