In [1]:
library(tidyverse)
library(reshape2)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths




In [2]:
library("RSQLite")

## connect to db
con <- dbConnect(drv=RSQLite::SQLite(), dbname="../input/soccer/database.sqlite")

## list all tables
tables <- dbListTables(con)

## exclude sqlite_sequence (contains table information)
tables <- tables[tables != "sqlite_sequence"]

lDataFrames <- vector("list", length=length(tables))

## create a data.frame for each table
for (i in seq(along=tables)) {
  lDataFrames[[i]] <- dbGetQuery(conn=con, statement=paste("SELECT * FROM '", tables[[i]], "'", sep=""))
}



In [None]:
match <- data.frame(lDataFrames[3])
team <- data.frame(lDataFrames[6])
# head(match,2)
# head(team,2)


In [None]:
match <- match %>% select(match_api_id,date,home_team_api_id,away_team_api_id,home_team_goal, away_team_goal)
team <- team %>% select(team_api_id,team_long_name)


In [None]:
match$home_score <- (match$home_team_goal - match$away_team_goal)
match$away_score <- - match$home_score
match$home_win_rate <- as.integer(as.logical(match$home_score>0))
match$away_win_rate <- as.integer(as.logical(match$away_score>0))
head(match,6)


In [None]:
match_home <- match %>% select(match_api_id,date,home_team_api_id,home_score,home_win_rate) %>% 
  rename(
    team_api_id = home_team_api_id,
    score = home_score,
    win_rate = home_win_rate
    )
match_away <- match %>% select(match_api_id,date,away_team_api_id,away_score, away_win_rate) %>% 
  rename(
    team_api_id = away_team_api_id,
    score = away_score,
    win_rate = away_win_rate
    )

In [None]:
total <- rbind(match_home, match_away)
total$date <- as.Date(total$date)
head(arrange(total,match_api_id))

In [None]:
match14 = with(total, total[(date >= "2014-01-01" & date < "2015-01-01"),])
match15 = with(total, total[(date >= "2015-01-01" & date < "2016-01-01"),])
                            

In [None]:
df14 = subset(match14, select=-c(date))
df15 = subset(match15, select=-c(date))

In [None]:
team_ranking14_byscore <- aggregate(df14$score, by=list(team_api_id=df14$team_api_id), FUN=mean) %>% rename(score=x)
team_ranking15_byscore <- aggregate(df15$score, by=list(team_api_id=df15$team_api_id), FUN=mean) %>% rename(score=x)

In [None]:
team_ranking14_bywin <- aggregate(df14$win_rate, by=list(team_api_id=df14$team_api_id), FUN=mean) %>% rename(win_rate=x)
team_ranking15_bywin <- aggregate(df15$win_rate, by=list(team_api_id=df15$team_api_id), FUN=mean) %>% rename(win_rate=x)

In [None]:
team_ranking14 <- merge(team,team_ranking14_byscore, by="team_api_id",all.team_ranking14_byscore = TRUE)
team_ranking15 <- merge(team,team_ranking15_byscore,by="team_api_id",all.team_ranking14_byscore = TRUE)

team_ranking14 <- arrange(merge(team_ranking14,team_ranking14_bywin, by="team_api_id"),-win_rate)
team_ranking15 <- arrange(merge(team_ranking15,team_ranking15_bywin,by="team_api_id"),-win_rate)




In [None]:
head(team_ranking14)
head(team_ranking15)

In [9]:
#Preprocessing Team & Team Attributes
team <- data.frame(lDataFrames[6])
team_attr <- data.frame(lDataFrames[7])

team <- team %>% select(team_api_id,team_long_name)
team_attr <- team_attr %>% select(-c(id, team_fifa_api_id))

team_attr <- subset(team_attr, grepl("2014",date)|grepl("2015",date))

team_attr[sapply(team_attr, is.character)] <- lapply(team_attr[sapply(team_attr, is.character)], as.factor)
team_attr[sapply(team_attr, is.numeric)] <- lapply(team_attr[sapply(team_attr, is.numeric)], function(x){replace(x, x <0,NA)})
team_attr[sapply(team_attr, is.numeric)] <- lapply(team_attr[sapply(team_attr, is.numeric)],scale)
team_attr[sapply(team_attr, is.numeric)] <- lapply(team_attr[sapply(team_attr, is.numeric)],function(x){replace(x, x>2 | x < (-2),NA)})
team_attr[sapply(team_attr, is.numeric)] <- lapply(team_attr[sapply(team_attr, is.numeric)], function(x) { attributes(x) <- NULL; x })
head(team_attr,10)

Unnamed: 0_level_0,team_api_id,date,buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribbling,buildUpPlayDribblingClass,buildUpPlayPassing,buildUpPlayPassingClass,buildUpPlayPositioningClass,chanceCreationPassing,⋯,chanceCreationShooting,chanceCreationShootingClass,chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass
Unnamed: 0_level_1,<dbl>,<fct>,<dbl>,<fct>,<dbl>,<fct>,<dbl>,<fct>,<fct>,<dbl>,⋯,<dbl>,<fct>,<fct>,<dbl>,<fct>,<dbl>,<fct>,<dbl>,<fct>,<fct>
2,-0.06560063,2014-09-19 00:00:00,-0.2144838,Balanced,-0.06275509,Normal,0.6452846,Mixed,Organised,0.1430057,⋯,1.24263265,Normal,Organised,0.1990173,Medium,-0.5665414,Press,0.3153963,Normal,Cover
3,-0.06560063,2015-09-10 00:00:00,-0.7251099,Balanced,-0.78602338,Normal,0.4505643,Mixed,Organised,0.1430057,⋯,1.24263265,Normal,Organised,0.1990173,Medium,-0.5665414,Press,0.3153963,Normal,Cover
8,-0.13472968,2014-09-19 00:00:00,0.3982674,Balanced,1.59042957,Normal,1.2294453,Mixed,Organised,0.345115,⋯,0.55602546,Normal,Organised,-0.4768123,Medium,-0.8188536,Press,1.1076675,Normal,Cover
9,-0.13472968,2015-09-10 00:00:00,0.5003927,Balanced,1.59042957,Normal,0.3532042,Mixed,Organised,-0.1601582,⋯,1.14454591,Normal,Free Form,0.4242938,Medium,-0.4403853,Press,1.5038031,Normal,Cover
14,-0.13037623,2014-09-19 00:00:00,0.5003927,Balanced,0.86716128,Normal,0.2558441,Mixed,Organised,-0.4633221,⋯,0.06559175,Normal,Organised,-0.8147271,Medium,-0.1880732,Press,0.1833511,Normal,Cover
15,-0.13037623,2015-09-10 00:00:00,0.5003927,Balanced,0.86716128,Normal,0.2558441,Mixed,Organised,-0.4633221,⋯,0.06559175,Normal,Organised,-0.8147271,Medium,-0.1880732,Press,0.1833511,Normal,Cover
20,-0.13095031,2014-09-19 00:00:00,-0.6229847,Balanced,,Lots,0.2558441,Mixed,Organised,1.3556614,⋯,,Lots,Organised,1.4380382,Medium,1.0734876,Press,-0.3448297,Normal,Cover
21,-0.13095031,2015-09-10 00:00:00,-0.6229847,Balanced,,Lots,0.2558441,Mixed,Organised,1.3556614,⋯,1.73306636,Lots,Organised,1.4380382,Medium,1.0734876,Press,-0.3448297,Normal,Cover
26,-0.05196617,2014-09-19 00:00:00,-0.2144838,Balanced,0.45386512,Normal,-0.5230369,Mixed,Organised,0.2440604,⋯,0.85028568,Normal,Organised,-0.7020888,Medium,-1.3234779,Press,1.2397127,Normal,Cover
27,-0.05196617,2015-09-10 00:00:00,-0.1123586,Balanced,0.45386512,Normal,-0.5230369,Mixed,Organised,0.2440604,⋯,0.85028568,Normal,Organised,-0.7020888,Medium,-1.3234779,Press,1.2397127,Normal,Cover
