# Reading Matches data into pandas


In [None]:
import pandas as pd

read csv file

In [None]:
matches = pd.read_csv('matches_new.csv',index_col = 0)

In [None]:
matches.head()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,match report,notes,sh,sot,dist,fk,pk,pkatt,season,team
0,2023-08-12,21:30,La Liga,Matchweek 1,Sat,Away,W,2.0,0.0,Athletic Club,...,Match Report,,14.0,8.0,16.0,2.0,0,0,2024,Real Madrid
1,2023-08-19,19:30,La Liga,Matchweek 2,Sat,Away,W,3.0,1.0,Almería,...,Match Report,,25.0,9.0,17.0,1.0,0,0,2024,Real Madrid
2,2023-08-25,21:30,La Liga,Matchweek 3,Fri,Away,W,1.0,0.0,Celta Vigo,...,Match Report,,9.0,2.0,19.4,0.0,0,1,2024,Real Madrid
3,2023-09-02,16:15,La Liga,Matchweek 4,Sat,Home,W,2.0,1.0,Getafe,...,Match Report,,26.0,12.0,17.7,0.0,0,0,2024,Real Madrid
4,2023-09-17,21:00,La Liga,Matchweek 5,Sun,Home,W,2.0,1.0,Real Sociedad,...,Match Report,,17.0,8.0,15.9,1.0,0,0,2024,Real Madrid


In [None]:
matches.shape

(1230, 27)

In [None]:
matches["team"].value_counts()

Real Madrid        62
Villarreal         62
Cadiz              62
Real Betis         62
Real Sociedad      62
Valencia           62
Osasuna            62
Girona             62
Mallorca           61
Celta Vigo         61
Sevilla            61
Rayo Vallecano     61
Getafe             61
Almeria            61
Athletic Club      61
Atletico Madrid    61
Barcelona          61
Espanyol           38
Valladolid         38
Elche              38
Las Palmas         24
Alaves             24
Granada            23
Name: team, dtype: int64

In [None]:
matches["round"].value_counts()

Matchweek 1     40
Matchweek 13    40
Matchweek 23    40
Matchweek 20    40
Matchweek 22    40
Matchweek 2     40
Matchweek 19    40
Matchweek 18    40
Matchweek 17    40
Matchweek 16    40
Matchweek 15    40
Matchweek 14    40
Matchweek 21    40
Matchweek 12    40
Matchweek 6     40
Matchweek 11    40
Matchweek 3     40
Matchweek 5     40
Matchweek 4     40
Matchweek 7     40
Matchweek 8     40
Matchweek 9     40
Matchweek 10    40
Matchweek 24    30
Matchweek 32    20
Matchweek 37    20
Matchweek 36    20
Matchweek 35    20
Matchweek 34    20
Matchweek 33    20
Matchweek 26    20
Matchweek 31    20
Matchweek 30    20
Matchweek 29    20
Matchweek 28    20
Matchweek 27    20
Matchweek 25    20
Matchweek 38    20
Name: round, dtype: int64

some matchweeks have less count , so we know some data is missing

# Cleaning Data for ML

In [None]:
matches.dtypes

date             object
time             object
comp             object
round            object
day              object
venue            object
result           object
gf              float64
ga              float64
opponent         object
xg              float64
xga             float64
poss            float64
attendance      float64
captain          object
formation        object
referee          object
match report     object
notes           float64
sh              float64
sot             float64
dist            float64
fk              float64
pk                int64
pkatt             int64
season            int64
team             object
dtype: object

ML model works only on int,float etc. so if we need to work on for. e.g. date , we need to convert it's dtype

In [None]:
matches["date"] = pd.to_datetime(matches["date"])

In [None]:
matches.dtypes

date            datetime64[ns]
time                    object
comp                    object
round                   object
day                     object
venue                   object
result                  object
gf                     float64
ga                     float64
opponent                object
xg                     float64
xga                    float64
poss                   float64
attendance             float64
captain                 object
formation               object
referee                 object
match report            object
notes                  float64
sh                     float64
sot                    float64
dist                   float64
fk                     float64
pk                       int64
pkatt                    int64
season                   int64
team                    object
dtype: object

# Creating Predictors for ML Model

In [None]:
matches["venue_code"] = matches["venue"].astype("category").cat.codes # converting home,away into numeric values

simply we'll convert useful column values into numeric.

In [None]:
matches["opp_code"] = matches["opponent"].astype("category").cat.codes #opponent codes

In [None]:
matches["hour"] = matches["time"].str.replace(":.+","",regex = True).astype("int") #only take hr from time

In [None]:
matches["day_code"] = matches["date"].dt.dayofweek # e.g. sunday has code 6

In [None]:
matches["ref_code"] = matches["referee"].astype("category").cat.codes
matches["capt_code"] = matches["captain"].astype("category").cat.codes

our target will be to determine if team won or not. so if team loss or draw we assign 0 and if it wins we assign 1.

In [None]:
matches["target"] = (matches["result"] == "W").astype("int")

In [None]:
matches

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,pkatt,season,team,venue_code,opp_code,hour,day_code,ref_code,capt_code,target
0,2023-08-12,21:30,La Liga,Matchweek 1,Sat,Away,W,2.0,0.0,Athletic Club,...,0,2024,Real Madrid,0,2,21,5,11,18,1
1,2023-08-19,19:30,La Liga,Matchweek 2,Sat,Away,W,3.0,1.0,Almería,...,0,2024,Real Madrid,0,1,19,5,14,18,1
2,2023-08-25,21:30,La Liga,Matchweek 3,Fri,Away,W,1.0,0.0,Celta Vigo,...,1,2024,Real Madrid,0,6,21,4,9,18,1
3,2023-09-02,16:15,La Liga,Matchweek 4,Sat,Home,W,2.0,1.0,Getafe,...,0,2024,Real Madrid,1,10,16,5,17,68,1
4,2023-09-17,21:00,La Liga,Matchweek 5,Sun,Home,W,2.0,1.0,Real Sociedad,...,0,2024,Real Madrid,1,18,21,6,6,18,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36,2023-05-14,16:15,La Liga,Matchweek 34,Sun,Home,W,1.0,0.0,Atlético Madrid,...,0,2023,Elche,1,3,16,6,2,27,1
37,2023-05-20,18:30,La Liga,Matchweek 35,Sat,Away,D,1.0,1.0,Getafe,...,0,2023,Elche,0,10,18,5,10,27,0
38,2023-05-24,19:30,La Liga,Matchweek 36,Wed,Home,D,1.0,1.0,Sevilla,...,0,2023,Elche,1,19,19,2,9,27,0
39,2023-05-28,19:00,La Liga,Matchweek 37,Sun,Away,W,1.0,0.0,Athletic Club,...,0,2023,Elche,0,2,19,6,22,27,1


# Creating Initial ML Model

we gonna import randomforestclassifier model from sklearn. if opcode = 18 it will not consider it as much stronger team than teams with less opcode value.

it has multiple decision trees.

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators = 50, min_samples_split = 10,random_state = 1)

n_estimators , more it is more time code runs, and accuracy improves.
min_samples_split , more high it is , less chance to overfit
random_state = 1, we get same results for same data

In [None]:
train = matches[matches["date"] < "2024-01-01"]

In [None]:
test = matches[matches["date"] > "2024-01-01"]

In [None]:
predictors = ["venue_code","opp_code","hour","day_code","ref_code","capt_code"]

now after breaking dataset into training and testing datsets. we are gonna work on train set and fit our model using predictors on "target"

In [None]:
rf.fit(train[predictors],train["target"])

after training our model we can predict on test data

In [None]:
preds = rf.predict(test[predictors])

now we gonna see the accuracy of our model on test data

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
acc = accuracy_score(test["target"],preds) # send actual vs predicted values

In [None]:
acc

0.6181818181818182

we can look at confusion matrix to see where our model performs good or bad

In [None]:
combined = pd.DataFrame(dict(actual = test["target"], prediction = preds))

In [None]:
pd.crosstab(index = combined["actual"],columns = combined["prediction"])

prediction,0,1
actual,Unnamed: 1_level_1,Unnamed: 2_level_1
0,54,14
1,28,14


now let's measure the precision, which tells the % of times our prediction was correct.

In [None]:
from sklearn.metrics import precision_score

In [None]:
precision_score(test["target"],preds)

0.5

# Improving Precision With rolling averages

we gonna take averages from prev match weeks and predict and will happen in next matchweek. averages would include data from multiple cols like pk,fk,sot etc.

In [None]:
grouped_teams = matches.groupby("team") # groups each teams data in their respective groups

In [None]:
group = grouped_teams.get_group("Real Madrid")

In [None]:
group

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,pkatt,season,team,venue_code,opp_code,hour,day_code,ref_code,capt_code,target
0,2023-08-12,21:30,La Liga,Matchweek 1,Sat,Away,W,2.0,0.0,Athletic Club,...,0,2024,Real Madrid,0,2,21,5,11,18,1
1,2023-08-19,19:30,La Liga,Matchweek 2,Sat,Away,W,3.0,1.0,Almería,...,0,2024,Real Madrid,0,1,19,5,14,18,1
2,2023-08-25,21:30,La Liga,Matchweek 3,Fri,Away,W,1.0,0.0,Celta Vigo,...,1,2024,Real Madrid,0,6,21,4,9,18,1
3,2023-09-02,16:15,La Liga,Matchweek 4,Sat,Home,W,2.0,1.0,Getafe,...,0,2024,Real Madrid,1,10,16,5,17,68,1
4,2023-09-17,21:00,La Liga,Matchweek 5,Sun,Home,W,2.0,1.0,Real Sociedad,...,0,2024,Real Madrid,1,18,21,6,6,18,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53,2023-05-13,21:00,La Liga,Matchweek 34,Sat,Home,W,1.0,0.0,Getafe,...,0,2023,Real Madrid,1,10,21,5,15,78,1
55,2023-05-21,18:30,La Liga,Matchweek 35,Sun,Away,L,0.0,1.0,Valencia,...,0,2023,Real Madrid,0,20,18,6,21,60,0
56,2023-05-24,19:30,La Liga,Matchweek 36,Wed,Home,W,2.0,1.0,Rayo Vallecano,...,0,2023,Real Madrid,1,16,19,2,11,60,1
57,2023-05-27,19:00,La Liga,Matchweek 37,Sat,Away,W,2.0,1.0,Sevilla,...,0,2023,Real Madrid,0,19,19,5,6,68,1


In [None]:
def rolling_averages(group,cols,new_cols):
  group = group.sort_values("date") #sort by date
  rolling_stats = group[cols].rolling(3,closed = 'left').mean() # closed = left leaves the current matchweek out of calculation
  group[new_cols] = rolling_stats # replace the updated parameters in new cols
  group = group.dropna(subset = new_cols) #fills non existing cols

  return group

In [None]:
cols = ["gf","ga","sh","sot","dist","fk","pk","pkatt","attendance","poss"]
new_cols = [f"{c}_rolling" for c in cols]

In [None]:
new_cols

['gf_rolling',
 'ga_rolling',
 'sh_rolling',
 'sot_rolling',
 'dist_rolling',
 'fk_rolling',
 'pk_rolling',
 'pkatt_rolling',
 'attendance_rolling',
 'poss_rolling']

In [None]:
rolling_averages(group,cols,new_cols)

now we gonna use this function to calculate rolling averages for each team.

In [None]:
matches_rolling = matches.groupby("team").apply(lambda x : rolling_averages(x,cols,new_cols))

In [None]:
matches_rolling

Unnamed: 0_level_0,Unnamed: 1_level_0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling,attendance_rolling,poss_rolling
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Alaves,3,2023-09-02,18:30,La Liga,Matchweek 4,Sat,Home,W,1.0,0.0,Valencia,...,1.333333,1.666667,13.333333,3.333333,19.033333,0.333333,0.000000,0.000000,15212.333333,47.333333
Alaves,4,2023-09-15,21:00,La Liga,Matchweek 5,Fri,Away,L,0.0,2.0,Rayo Vallecano,...,1.666667,1.333333,12.000000,2.666667,16.966667,0.333333,0.000000,0.333333,14324.000000,42.333333
Alaves,5,2023-09-22,21:00,La Liga,Matchweek 6,Fri,Home,L,0.0,2.0,Athletic Club,...,0.333333,1.000000,10.333333,0.666667,16.966667,0.333333,0.000000,0.333333,13260.000000,47.333333
Alaves,6,2023-09-28,19:00,La Liga,Matchweek 7,Thu,Away,D,1.0,1.0,Celta Vigo,...,0.333333,1.333333,10.000000,2.000000,15.800000,0.333333,0.000000,0.333333,16059.333333,44.333333
Alaves,7,2023-10-01,16:15,La Liga,Matchweek 8,Sun,Home,L,0.0,2.0,Osasuna,...,0.333333,1.666667,15.000000,5.000000,15.466667,0.666667,0.000000,0.000000,16129.666667,49.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Villarreal,28,2024-01-13,13:00,La Liga,Matchweek 20,Sat,Away,L,0.0,3.0,Las Palmas,...,1.666667,3.000000,7.333333,4.000000,15.533333,0.000000,0.333333,0.333333,43386.666667,48.000000
Villarreal,29,2024-01-20,16:15,La Liga,Matchweek 21,Sat,Home,D,1.0,1.0,Mallorca,...,1.333333,2.666667,12.000000,4.666667,14.766667,0.666667,0.333333,0.333333,28048.333333,50.000000
Villarreal,30,2024-01-27,18:30,La Liga,Matchweek 22,Sat,Away,W,5.0,3.0,Barcelona,...,0.666667,2.333333,14.333333,4.000000,16.900000,1.333333,0.000000,0.000000,28645.666667,50.666667
Villarreal,31,2024-02-04,14:00,La Liga,Matchweek 23,Sun,Home,D,0.0,0.0,Cádiz,...,2.000000,2.333333,14.666667,4.666667,16.133333,1.333333,0.000000,0.000000,29064.333333,42.333333


we don't need team name index at starting, it makes difficulty for ml model. so we drop it.

In [None]:
matches_rolling = matches_rolling.droplevel("team")

In [None]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling,attendance_rolling,poss_rolling
3,2023-09-02,18:30,La Liga,Matchweek 4,Sat,Home,W,1.0,0.0,Valencia,...,1.333333,1.666667,13.333333,3.333333,19.033333,0.333333,0.000000,0.000000,15212.333333,47.333333
4,2023-09-15,21:00,La Liga,Matchweek 5,Fri,Away,L,0.0,2.0,Rayo Vallecano,...,1.666667,1.333333,12.000000,2.666667,16.966667,0.333333,0.000000,0.333333,14324.000000,42.333333
5,2023-09-22,21:00,La Liga,Matchweek 6,Fri,Home,L,0.0,2.0,Athletic Club,...,0.333333,1.000000,10.333333,0.666667,16.966667,0.333333,0.000000,0.333333,13260.000000,47.333333
6,2023-09-28,19:00,La Liga,Matchweek 7,Thu,Away,D,1.0,1.0,Celta Vigo,...,0.333333,1.333333,10.000000,2.000000,15.800000,0.333333,0.000000,0.333333,16059.333333,44.333333
7,2023-10-01,16:15,La Liga,Matchweek 8,Sun,Home,L,0.0,2.0,Osasuna,...,0.333333,1.666667,15.000000,5.000000,15.466667,0.666667,0.000000,0.000000,16129.666667,49.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,2024-01-13,13:00,La Liga,Matchweek 20,Sat,Away,L,0.0,3.0,Las Palmas,...,1.666667,3.000000,7.333333,4.000000,15.533333,0.000000,0.333333,0.333333,43386.666667,48.000000
29,2024-01-20,16:15,La Liga,Matchweek 21,Sat,Home,D,1.0,1.0,Mallorca,...,1.333333,2.666667,12.000000,4.666667,14.766667,0.666667,0.333333,0.333333,28048.333333,50.000000
30,2024-01-27,18:30,La Liga,Matchweek 22,Sat,Away,W,5.0,3.0,Barcelona,...,0.666667,2.333333,14.333333,4.000000,16.900000,1.333333,0.000000,0.000000,28645.666667,50.666667
31,2024-02-04,14:00,La Liga,Matchweek 23,Sun,Home,D,0.0,0.0,Cádiz,...,2.000000,2.333333,14.666667,4.666667,16.133333,1.333333,0.000000,0.000000,29064.333333,42.333333


we have 1153 rows, but it shows upto 31, means indices are repeated. we don't want that we need them unique.

In [None]:
matches_rolling.index = range(matches_rolling.shape[0])

In [None]:
matches_rolling

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,gf_rolling,ga_rolling,sh_rolling,sot_rolling,dist_rolling,fk_rolling,pk_rolling,pkatt_rolling,attendance_rolling,poss_rolling
0,2023-09-02,18:30,La Liga,Matchweek 4,Sat,Home,W,1.0,0.0,Valencia,...,1.333333,1.666667,13.333333,3.333333,19.033333,0.333333,0.000000,0.000000,15212.333333,47.333333
1,2023-09-15,21:00,La Liga,Matchweek 5,Fri,Away,L,0.0,2.0,Rayo Vallecano,...,1.666667,1.333333,12.000000,2.666667,16.966667,0.333333,0.000000,0.333333,14324.000000,42.333333
2,2023-09-22,21:00,La Liga,Matchweek 6,Fri,Home,L,0.0,2.0,Athletic Club,...,0.333333,1.000000,10.333333,0.666667,16.966667,0.333333,0.000000,0.333333,13260.000000,47.333333
3,2023-09-28,19:00,La Liga,Matchweek 7,Thu,Away,D,1.0,1.0,Celta Vigo,...,0.333333,1.333333,10.000000,2.000000,15.800000,0.333333,0.000000,0.333333,16059.333333,44.333333
4,2023-10-01,16:15,La Liga,Matchweek 8,Sun,Home,L,0.0,2.0,Osasuna,...,0.333333,1.666667,15.000000,5.000000,15.466667,0.666667,0.000000,0.000000,16129.666667,49.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156,2024-01-13,13:00,La Liga,Matchweek 20,Sat,Away,L,0.0,3.0,Las Palmas,...,1.666667,3.000000,7.333333,4.000000,15.533333,0.000000,0.333333,0.333333,43386.666667,48.000000
1157,2024-01-20,16:15,La Liga,Matchweek 21,Sat,Home,D,1.0,1.0,Mallorca,...,1.333333,2.666667,12.000000,4.666667,14.766667,0.666667,0.333333,0.333333,28048.333333,50.000000
1158,2024-01-27,18:30,La Liga,Matchweek 22,Sat,Away,W,5.0,3.0,Barcelona,...,0.666667,2.333333,14.333333,4.000000,16.900000,1.333333,0.000000,0.000000,28645.666667,50.666667
1159,2024-02-04,14:00,La Liga,Matchweek 23,Sun,Home,D,0.0,0.0,Cádiz,...,2.000000,2.333333,14.666667,4.666667,16.133333,1.333333,0.000000,0.000000,29064.333333,42.333333


# ReTraining Our Model

In [None]:
def make_predictions(data,predictors):
  train = data[data["date"] < "2024-01-01"]
  test = data[data["date"] > "2024-01-01"]
  rf.fit(train[predictors],train["target"])
  preds = rf.predict(test[predictors])
  combined = pd.DataFrame(dict(actual = test["target"], prediction = preds),index = test.index)
  precision = precision_score(test["target"],preds)

  return combined,precision

In [None]:
combined, precision = make_predictions(matches_rolling,predictors+new_cols)

In [None]:
precision

0.4827586206896552

In [None]:
combined

Unnamed: 0,actual,prediction
15,0,0
16,1,0
17,1,0
18,1,0
19,0,0
...,...,...
1156,0,0
1157,0,0
1158,1,0
1159,0,0


In [None]:
combined = combined.merge(matches_rolling[["date","team","opponent","result"]],left_index = True, right_index = True)

In [None]:
combined

Unnamed: 0,actual,prediction,date,team,opponent,result
15,0,0,2024-01-02,Alaves,Real Sociedad,D
16,1,0,2024-01-12,Alaves,Sevilla,W
17,1,0,2024-01-19,Alaves,Cádiz,W
18,1,0,2024-01-26,Alaves,Almería,W
19,0,0,2024-02-03,Alaves,Barcelona,L
...,...,...,...,...,...,...
1156,0,0,2024-01-13,Villarreal,Las Palmas,L
1157,0,0,2024-01-20,Villarreal,Mallorca,D
1158,1,0,2024-01-27,Villarreal,Barcelona,W
1159,0,0,2024-02-04,Villarreal,Cádiz,D


if there are different names on home and opp of same team we gonna map those names and merge the data to find optimal results.

In [None]:
# class MissingDict(dict):
#     __missing__ = lambda self, key: key
# map_values = {
#     "Real Madrid" : "Real Madrid",
#     "Barca" : "Barcelona",
#     "A Madrid" : "Athletico Madrid"
# }
# mapping = MissingDict(**map_values)

In [None]:
# mapping["Barca"]

In [None]:
# combined["new_team"] = combined["team"].map(mapping) #we don't need to map

In [None]:
# combined

In [None]:
merged = combined.merge(combined, left_on=["date", "team"], right_on=["date", "opponent"])

we merged the away and home results of both teams from same matchday

In [None]:
merged

Unnamed: 0,actual_x,prediction_x,date,team_x,opponent_x,result_x,actual_y,prediction_y,team_y,opponent_y,result_y
0,1,0,2024-01-04,Athletic Club,Sevilla,W,0,1,Sevilla,Athletic Club,L
1,1,0,2024-01-13,Athletic Club,Real Sociedad,W,0,0,Real Sociedad,Athletic Club,L
2,0,0,2024-01-20,Athletic Club,Valencia,L,1,0,Valencia,Athletic Club,W
3,0,0,2024-01-28,Athletic Club,Cádiz,D,0,0,Cadiz,Athletic Club,D
4,1,1,2024-02-02,Athletic Club,Mallorca,W,0,0,Mallorca,Athletic Club,L
...,...,...,...,...,...,...,...,...,...,...,...
77,0,0,2024-01-13,Villarreal,Las Palmas,L,1,1,Las Palmas,Villarreal,W
78,0,0,2024-01-20,Villarreal,Mallorca,D,0,0,Mallorca,Villarreal,D
79,1,0,2024-01-27,Villarreal,Barcelona,W,0,1,Barcelona,Villarreal,L
80,0,0,2024-02-04,Villarreal,Cádiz,D,0,0,Cadiz,Villarreal,D


now we gonna take the cases when prediction says x team won and y team lost, and compare with what actually happened.

In [None]:
merged[(merged["prediction_x"] == 1) & (merged["prediction_y"] == 0)] ["actual_x"].value_counts()

1    12
0    11
Name: actual_x, dtype: int64

In [None]:
12/23 #accuracy

0.5217391304347826