# Data Analysis of Premier League Data with dataprep.eda library

In [34]:
import pandas as pd
import dataprep.eda as dt

## Importation of the dataset

In [35]:
df = pd.read_csv('Premier League Player Stats.csv')

## Deletion of the _Rank_ column who is not useful

In [36]:
df = df.drop('Rank',axis=1)

## Filtering the dataset to delete the least impactful players

In [37]:
df = df[(df['SHOTS'] > 10) & ((df['ASSISTS'] > 0) | (df['GOALS'] > 0))]

## Pearson, Spearman and KendallTau correlation plots

In [38]:
dt.plot_correlation(df)

## Histogram of all the rows of the dataframe

In [39]:
dt.plot(df)

## Overall stats of the goals column

In [40]:
dt.plot(df,"GOALS")

## Creation of the report related to this dataset

In [41]:
report = dt.create_report(df,title='Report Test')

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=74.0), HTML(value='')), layout=Layout(dis…




In [42]:
report

0,1
Number of Variables,9
Number of Observations,197
Missing Cells,0
Missing Cells (%),0.0%
Duplicate Rows,0
Duplicate Rows (%),0.0%
Total Size in Memory,41.5 KB
Average Record Size in Memory,215.6 B

0,1
Categorical,2
Numerical,7

0,1
Distinct Count,197
Unique (%),100.0%
Missing,0
Missing (%),0.0%
Memory Size,3.1 KB

0,1
Mean,15.8629
Median,14.0
Minimum,7.0
Maximum,39.0

0,1
1st row,Jamie Vardy
2nd row,Danny Ings
3rd row,Pierre-Emerick Aub...
4th row,Raheem Shaquille S...
5th row,Mohamed Salah Ghal...

0,1
Count,2790
Lowercase Letter,2320
Space Separator,272
Uppercase Letter,470
Dash Punctuation,9
Decimal Number,0

0,1
Distinct Count,20
Unique (%),10.2%
Missing,0
Missing (%),0.0%
Memory Size,3.1 KB

0,1
Mean,13.1117
Median,14.0
Minimum,7.0
Maximum,24.0

0,1
1st row,Leicester City
2nd row,Southampton
3rd row,Arsenal
4th row,Manchester City
5th row,Liverpool

0,1
Count,2427
Lowercase Letter,2085
Space Separator,156
Uppercase Letter,342
Dash Punctuation,0
Decimal Number,0

0,1
Distinct Count,28
Unique (%),14.2%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,29.5381
Minimum,10
Maximum,38
Zeros,0

0,1
Minimum,10
5-th Percentile,18
Q1,25
Median,31
Q3,35
95-th Percentile,38
Maximum,38
Range,28
IQR,10

0,1
Standard Deviation,6.7185
Coefficient of Variation,0.2275
Kurtosis,-0.1384
Mean,29.5381
Skewness,-0.7639
Sum,5819.0
Variance,45.1376

0,1
Distinct Count,37
Unique (%),18.8%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,23.8832
Minimum,1
Maximum,38
Zeros,0

0,1
Minimum,1.0
5-th Percentile,8.0
Q1,17.0
Median,26.0
Q3,32.0
95-th Percentile,37.16
Maximum,38.0
Range,37.0
IQR,15.0

0,1
Standard Deviation,9.2737
Coefficient of Variation,0.3883
Kurtosis,-0.8566
Mean,23.8832
Skewness,-0.4067
Sum,4705.0
Variance,86.0016

0,1
Distinct Count,188
Unique (%),95.4%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,2129.2538
Minimum,226
Maximum,3420
Zeros,0

0,1
Minimum,226.0
5-th Percentile,830.2
Q1,1510.88
Median,2262.96
Q3,2768.0
95-th Percentile,3290.24
Maximum,3420.0
Range,3194.0
IQR,1257.12

0,1
Standard Deviation,790.0418
Coefficient of Variation,0.371
Kurtosis,-0.9565
Mean,2129.2538
Skewness,-0.317
Sum,419463.0
Variance,624166.0169

0,1
Distinct Count,22
Unique (%),11.2%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,4.5178
Minimum,0
Maximum,23
Zeros,13

0,1
Minimum,0
5-th Percentile,0
Q1,2
Median,3
Q3,6
95-th Percentile,17
Maximum,23
Range,23
IQR,4

0,1
Standard Deviation,4.7387
Coefficient of Variation,1.0489
Kurtosis,3.4122
Mean,4.5178
Skewness,1.901
Sum,890.0
Variance,22.455

0,1
Distinct Count,14
Unique (%),7.1%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,2.7005
Minimum,0
Maximum,20
Zeros,29

0,1
Minimum,0.0
5-th Percentile,0.0
Q1,1.0
Median,2.0
Q3,4.0
95-th Percentile,8.16
Maximum,20.0
Range,20.0
IQR,3.0

0,1
Standard Deviation,2.751
Coefficient of Variation,1.0187
Kurtosis,8.4227
Mean,2.7005
Skewness,2.2792
Sum,532.0
Variance,7.568

0,1
Distinct Count,53
Unique (%),26.9%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,28.5381
Minimum,11
Maximum,95
Zeros,0

0,1
Minimum,11
5-th Percentile,12
Q1,15
Median,22
Q3,37
95-th Percentile,70
Maximum,95
Range,84
IQR,22

0,1
Standard Deviation,17.9362
Coefficient of Variation,0.6285
Kurtosis,1.33
Mean,28.5381
Skewness,1.3699
Sum,5622.0
Variance,321.709

0,1
Distinct Count,43
Unique (%),21.8%
Missing,0
Missing (%),0.0%
Infinite,0
Infinite (%),0.0%
Mean,14.1777
Minimum,1
Maximum,59
Zeros,0

0,1
Minimum,1.0
5-th Percentile,3.0
Q1,7.0
Median,10.0
Q3,20.0
95-th Percentile,39.16
Maximum,59.0
Range,58.0
IQR,13.0

0,1
Standard Deviation,11.0543
Coefficient of Variation,0.7797
Kurtosis,1.5343
Mean,14.1777
Skewness,1.4103
Sum,2793.0
Variance,122.1979
