In [3]:
import pandas as pd
from itertools import islice
import matplotlib.pyplot as plt

In [4]:
#Classe degli indicatori che ci permette di inizializzare il dataframe data una squadra in input
class Indicators:

    def __init__(self,DataFrame,TeamChosen):
        self.Team = TeamChosen
        self.TeamMatches = DataFrame.loc[((DataFrame.home_team == self.Team) | (DataFrame.away_team == self.Team)) & (DataFrame.tournament == "FIFA World Cup")].copy()
        self.wins = len(self.TeamMatches[((self.TeamMatches.home_team == self.Team) & (self.TeamMatches.home_score > self.TeamMatches.away_score)) | ((self.TeamMatches.away_team == self.Team) & (self.TeamMatches.home_score < self.TeamMatches.away_score))].index)
        self.loses = len(self.TeamMatches[((self.TeamMatches.home_team == self.Team) & (self.TeamMatches.home_score < self.TeamMatches.away_score)) | ((self.TeamMatches.away_team == self.Team) & (self.TeamMatches.home_score > self.TeamMatches.away_score))].index)
        self.drafts = len(self.TeamMatches[self.TeamMatches.home_score == self.TeamMatches.away_score].index)
        goals_home = self.TeamMatches[['date','home_score','away_score']][(self.TeamMatches.home_team == self.Team)]
        goals_away = self.TeamMatches[['date','away_score','home_score']][(self.TeamMatches.away_team == self.Team)]

        goals_home.columns = ['date','goals_scored','goals_conceded']
        goals_away.columns = ['date','goals_scored','goals_conceded']
        
        frames_goals = [goals_home,goals_away]
        self.goals= pd.concat(frames_goals).sort_index()
        self.goals['date'] = pd.to_datetime(self.goals['date'])
        self.meanScores = self.goals['goals_scored'].mean()
        self.meanConceded = self.goals['goals_conceded'].mean()

    def printData(self):
        print("Wins : " + str(self.wins))
        print("Loses : " + str(self.loses))
        print("Drafts : " + str(self.drafts))
        print("Sum of W+L+D : " + str(self.wins+self.loses+self.drafts))
        print("The mean of the scores made by the team : "+str(self.meanScores))
        print("The mean of the scores conceded by the team : "+str(self.meanConceded))
        print("Print the indicators per match:")
        print(self.goals)

    def plotData(self):
        self.goals['goals_difference']=self.goals['goals_scored']-self.goals['goals_conceded']
        perYearPlot = self.goals.groupby(self.goals.date.dt.year).sum()
        print(perYearPlot)
        #self.goals.plot(kind = "line", x="date", y="goals_scored", label="Goal Scored")
        #self.goals.plot(kind = "line", x="date", y="goals_conceded", label="Goal Conceded")
        #self.goals.plot(kind = "line", x="date", y="goals_difference", label="Goal Difference")
        perYearPlot.reset_index().plot(kind = "line", x="date", y="goals_scored", label="Goal Scored")
        perYearPlot.reset_index().plot(kind = "line", x="date", y="goals_conceded", label="Goal Conceded")
        perYearPlot.reset_index().plot(kind = "line", x="date", y="goals_difference", label="Goal Difference")
        plt.show()
    
    def printDataFrameGoals(self):
        print(self.goals['date'])
        print(self.goals.dtypes)

    def getData(self):
        return self.wins,self.loses,self.drafts,self.meanScores,self.meanConceded
    
    def getPercentuale(self):
        total = self.wins+self.loses+self.drafts
        return (self.wins*100)/total,(self.loses*100)/total,(self.drafts*100)/total

    def isBetter(self, otherSquadIndicators):
        if(self.wins > otherSquadIndicators[0]):
            return True
        if(self.wins == otherSquadIndicators[0]):
            if(self.drafts >= otherSquadIndicators[2] and self.loses < otherSquadIndicators[1]):
                return True
        if(self.wins < otherSquadIndicators[0]):
            if(self.loses > otherSquadIndicators[1]):
                return self.wins + self.drafts > otherSquadIndicators[0] + otherSquadIndicators[1]  
        if(self.meanScores > otherSquadIndicators[3]):
            return True
        if(self.meanConceded > otherSquadIndicators[4]):
            return True
        if(self.wins == otherSquadIndicators[0] and self.loses == otherSquadIndicators[1] and self.drafts == otherSquadIndicators[2] and self.meanScores == otherSquadIndicators[3] and self.meanConceded == otherSquadIndicators[4]):
            return True
        return False
    pass



In [6]:
worldFootball = pd.read_csv("results.csv")

worldFootball = worldFootball[worldFootball.tournament == "FIFA World Cup"]

In [7]:
Team = "Italy"

TeamObj = Indicators(worldFootball,Team)

TeamObj.printData()

Wins : 45
Loses : 17
Drafts : 21
Sum of W+L+D : 83
The mean of the scores made by the team : 1.5421686746987953
The mean of the scores conceded by the team : 0.927710843373494
Print the indicators per match:
            date  goals_scored  goals_conceded
1694  1934-05-27             7               1
1699  1934-05-31             1               1
1700  1934-06-01             1               0
1702  1934-06-03             1               0
1705  1934-06-10             2               1
...          ...           ...             ...
32174 2010-06-20             1               1
32192 2010-06-24             2               3
36104 2014-06-14             2               1
36124 2014-06-20             0               1
36140 2014-06-24             0               1

[83 rows x 3 columns]


In [8]:
#Prelevo dal dataframe tutti i team#
#prelevo da sia home_team che away_team perché potrebbero esserci squadre che han giocato solo una volta#

allTeams_home = worldFootball[['home_team']].drop_duplicates()
allTeams_away = worldFootball[['away_team']].drop_duplicates()

allTeams_away.columns = ['team']
allTeams_home.columns = ['team']

allTeams = pd.concat([allTeams_away, allTeams_home]).drop_duplicates()

In [9]:
#associo ad ogni team le proprie statistiche (indicatori)#

allTeams['indicators'] = (allTeams['team'].map(lambda x: Indicators(worldFootball, x).getData()))


In [10]:
allTeams['Pts'] =  (allTeams['indicators'].map(lambda x: x[0]*3 + x[2])) 

TeamPts = TeamObj.getData()
TeamPts = TeamPts[0]*3 + TeamPts[2]

BetterTeamsByPts = allTeams[['team', 'Pts']][allTeams.Pts > TeamPts]


#print(BetterTeamsByPts)


allTeams['isBetter'] = (allTeams['indicators'].map(lambda x: not TeamObj.isBetter(x))) 

betterTeams = allTeams[allTeams.isBetter == True]



print("Better Teams")
#print(TeamObj.getData())
print(betterTeams)


Better Teams
         team                                         indicators  Pts  \
1327   Brazil  (73, 18, 18, 2.1009174311926606, 0.96330275229...  237   
1690  Germany  (67, 22, 20, 2.073394495412844, 1.146788990825...  221   

      isBetter  
1327      True  
1690      True  
