In [1]:
%use kandy
%use dataframe

In [2]:
fun queryByTimePeriodAndEntries(startYear: String, endYear: String, entries: Int) =
    """
SELECT
    pd.*
FROM
    powerlifting_data pd
        JOIN
    (
        SELECT
            meetname,
            date,
            weightclasskg,
            division,
            COUNT(*) AS lifter_count
        FROM
            powerlifting_data
        WHERE
            date BETWEEN '$startYear-01-01' AND '$endYear-12-31'
        GROUP BY
            meetname, date, weightclasskg, division
        HAVING
            COUNT(*) >= $entries
    ) AS qualified_classes
    ON pd.meetname = qualified_classes.meetname
        AND pd.date = qualified_classes.date
        AND pd.weightclasskg = qualified_classes.weightclasskg
        AND pd.division = qualified_classes.division
WHERE
pd.event = 'SBD'
  AND pd.date BETWEEN '$startYear-01-01' AND '$endYear-12-31'
  AND pd.squat1kg IS NOT NULL
  AND pd.squat2kg IS NOT NULL
  AND pd.squat3kg IS NOT NULL
  AND pd.bench1kg IS NOT NULL
  AND pd.bench2kg IS NOT NULL
  AND pd.bench3kg IS NOT NULL
  AND pd.deadlift1kg IS NOT NULL
  AND pd.deadlift2kg IS NOT NULL
  AND pd.deadlift3kg IS NOT NULL
  AND pd.best3benchkg IS NOT NULL
  AND pd.best3squatkg IS NOT NULL
  AND pd.best3deadliftkg IS NOT NULL
  AND place != 'NS';
    """

In [4]:
import util.Helpers

val helpers = Helpers()

val successfulLifts by column<Int>()
val count by column<Int>()


In [5]:
val query = queryByTimePeriodAndEntries(startYear = "2023", endYear = "2023", entries = 5)
val data = helpers.fetchResults(query)

In [6]:
val columns = listOf(
    data.squat1kg, data.squat2kg, data.squat3kg,
    data.bench1kg, data.bench2kg, data.bench3kg,
    data.deadlift1kg, data.deadlift2kg, data.deadlift3kg
)

In [7]:
data.head(10)

name,sex,event,equipment,age,ageclass,birthyearclass,division,bodyweightkg,weightclasskg,squat1kg,squat2kg,squat3kg,squat4kg,best3squatkg,bench1kg,bench2kg,bench3kg,bench4kg,best3benchkg,deadlift1kg,deadlift2kg,deadlift3kg,deadlift4kg,best3deadliftkg,totalkg,place,dots,wilks,glossbrenner,goodlift,tested,country,state,federation,parentfederation,date,meetcountry,meetstate,meettown,meetname
Louise Dalgren,F,SBD,Wraps,39500000,35-39,40-49,Open,64250000,67.5,95000000,105000000,115000000,,115000000,45000000,52500000,60000000,,60000000,105000000,117500000,127500000,,127500000,302500000,1,321500000,320110000,282590000,65360000,,Denmark,,WUAP-AUT,WUAP,2023-04-29,Austria,,Strengberg,Osterreichische Staatsmeisterschaft &...
Rita Arca,F,SBD,Raw,24000000,24-34,24-39,Open,74600000,75.0,120000000,130000000,137500000,,137500000,72500000,80000000,85000000,,85000000,155000000,170000000,185000000,,185000000,407500000,1,398020000,388690000,341980000,81060000,,,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Ana Santos,F,SBD,Raw,37000000,35-39,24-39,Open,69500000,75.0,120000000,130000000,135000000,,135000000,67500000,75000000,-77500000,,75000000,177500000,190000000,-195000000,,190000000,400000000,2,406140000,399920000,352580000,82540000,,Portugal,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Matilde Aleluia,F,SBD,Raw,21000000,20-23,19-23,Open,88200000,90.0,162500000,172500000,182500000,,182500000,77500000,82500000,-85000000,,82500000,180000000,192500000,200000000,,200000000,465000000,1,418310000,405270000,352920000,86230000,,,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Tiago Costa,M,SBD,Raw,32000000,24-34,24-39,Open,74100000,75.0,185000000,200000000,210000000,,210000000,132500000,142500000,147500000,,147500000,225000000,237500000,245000000,,245000000,602500000,1,435620000,432970000,418570000,88390000,,Portugal,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Diogo Vale,M,SBD,Raw,20000000,20-23,19-23,Open,79400000,82.5,190000000,205000000,-220000000,,205000000,120000000,132500000,-140000000,,132500000,220000000,232500000,242500000,,242500000,580000000,2,401720000,397860000,383480000,82120000,,,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Ricardo Lima,M,SBD,Raw,33000000,24-34,24-39,Open,76200000,82.5,187500000,-200000000,-200000000,,187500000,125000000,-140000000,-147500000,,125000000,230000000,250000000,-265000000,,250000000,562500000,3,399520000,396460000,382820000,81340000,,Portugal,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Márcio Correia,M,SBD,Raw,36000000,35-39,24-39,Open,80100000,82.5,-177500000,-177500000,177500000,,177500000,110000000,120000000,-127500000,,120000000,195000000,217500000,-222500000,,217500000,515000000,4,354860000,351310000,338480000,72590000,,Portugal,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Hugo Sousa,M,SBD,Raw,34000000,24-34,24-39,Open,79500000,82.5,120000000,135000000,142500000,,142500000,-92500000,92500000,100000000,,100000000,190000000,205000000,215000000,,215000000,457500000,5,316640000,313580000,302230000,64730000,,,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting
Paulo Gonçalves,M,SBD,Raw,37000000,35-39,24-39,Open,87400000,90.0,200000000,220000000,240000000,,240000000,150000000,160000000,-170000000,,160000000,250000000,270000000,285000000,,285000000,685000000,3,449640000,444070000,426150000,92400000,,Portugal,,WPC-Portugal,WPC,2023-02-18,Portugal,,Vila do Conde,Nacional Powerlifting


In [8]:
data.describe()

name,type,count,unique,nulls,top,freq,mean,std,min,median,max
name,String,47083,37551,0,Lance London,7,,,A Badhusha Sultan,Juan Manuel Aguirre,龚渠帆
sex,String,47083,2,0,M,34360,,,F,M,M
event,String,47083,1,0,SBD,47083,,,SBD,SBD,SBD
equipment,String,47083,5,0,Raw,42373,,,Multi-ply,Raw,Wraps
age,Double?,47083,140,5296,21500000,1758,26528538.0,8674711.0,9500000,24500000,84000000
ageclass,String?,47083,17,4403,24-34,18346,,,13-15,24-34,80-999
birthyearclass,String?,47083,8,4246,24-39,20840,,,14-18,24-39,70-999
division,String,47083,218,0,Open,18715,,,Amateur,Masters 1,Youth B
bodyweightkg,Double,47083,5620,0,82500000,201,83206137.0,17061422.0,21920000,81750000,245000000
weightclasskg,String,47083,52,0,93,4553,,,100,75,95


In [11]:

fun addNumberOfSuccessfulLifts(data: DataFrame<Line_15_jupyter._DataFrameType>, firstPlaceOnly: Boolean = true): AnyFrame {

    val df = if (firstPlaceOnly) data.filter { it.place == "1" } else data
    
    return df.add(successfulLifts) {
        columns.count { value -> it[value] > 0 }
    }
        .groupBy { it[successfulLifts] }
        .aggregate {
            count() into count
        }
        .drop { it[successfulLifts].equals(0) || it[successfulLifts].equals(1) || it[successfulLifts].equals(2) }
        .sortBy(successfulLifts)
}

In [12]:
val winnersDataFrame = addNumberOfSuccessfulLifts(data)

In [13]:
winnersDataFrame

successfulLifts,count
3,37
4,211
5,700
6,1527
7,2277
8,2348
9,1218


In [34]:
plot(winnersDataFrame) {

    bars {
        x(successfulLifts)
        y(count)
    }
}.save("distribution-of-winners.svg")

/Users/adelecarpenter/repos/kotlinnotebooks/src/main/kotlin/notebooks/lets-plot-images/distribution-of-winners.svg

In [40]:
val plotWinners = plot(winnersDataFrame) {

    bars {
        x(successfulLifts) {
            axis {
                breaks(listOf(3,4,5,6,7,8,9), format = "d")
            }
        }
        y(count) {
            axis.name = "Number of Winners"
            axis {
                breaks(listOf(250,500,750,1000,1250,1500,1750,2000,2250), format = "d")
            }
        }
        fillColor = Color.hex("#fec92e")
        borderLine {
            color = Color.hex("#777777")
            width = 0.5
        }
    }
//    points {
//        x.constant(9)
//        y.constant(30)
//        symbol = Symbol.CIRCLE_FILLED
//        alpha = 0.0 // transparent
//    }
    layout {
        title = "Distribution of Winners by Successful Attempts"
        caption = "data: Open powerlfting meets 2023"
        size = 600 to 600
        xAxisLabel = "Successful Attempts"
        style {
            global {
                text {
                    fontFamily = FontFamily.custom("Helvetica Neue")
                }
                plotCanvas {
                    title {
                        hJust = 0.5
                        margin = Margin(10.0)
                        fontSize = 17.0
                    }
                    caption {
                        hJust = 1.0
                        margin = Margin(10.0, 0.0, 0.0, 0.0)
                    }
                    margin = Margin(0.0, 30.0, 0.0, 10.0)
                }
            }
        }
    }
}

//plotWinners
plotWinners.save("distribution-of-winners-custom-formatting.svg")

/Users/adelecarpenter/repos/kotlinnotebooks/src/main/kotlin/notebooks/lets-plot-images/distribution-of-winners-custom-formatting.svg

In [15]:
val allLiftersDataFrame = addNumberOfSuccessfulLifts(data, false)

In [16]:
allLiftersDataFrame

successfulLifts,count
3,261
4,1402
5,4297
6,8756
7,12692
8,12806
9,6867


In [17]:
val winners by column<Int>()
val allLifters by column<Int>()
val ratioWinners by column<Int>()

val dfRatioWinners =
    dataFrameOf(winnersDataFrame.rename(count).into(winners).columns() + allLiftersDataFrame.select(count).rename(count).into(allLifters).columns())
        .add(ratioWinners) {
            (it[winners].toDouble() / it[allLifters].toDouble()) * 100.0
        }

In [18]:
val plotRatioWinners = plot(dfRatioWinners) {

    bars {
        x(successfulLifts) {
            axis {
                breaks(listOf(3,4,5,6,7,8,9), format = "d")
            }
        }
        y(ratioWinners) {
            axis.name = "percentage"
        }
        fillColor = Color.hex("#fec92e")
        borderLine {
            color = Color.hex("#777777")
            width = 0.5
        }
    }
    points {
        x.constant(9)
        y.constant(30)
        symbol = Symbol.CIRCLE_FILLED
        alpha = 0.0 // transparent
    }
    layout {
        title = "Percentage of First Places by Successful Lifts"
        subtitle = "at least 5 lifters in weight class"
        caption = "data: Open powerlfting meets 2023"
        size = 600 to 400
        xAxisLabel = "Successful Attempts"
        style {
            global {
                text {
                    fontFamily = FontFamily.custom("Helvetica Neue")
                }
                plotCanvas {
                    title {
                        hJust = 0.5
                        margin = Margin(10.0)
                        fontSize = 14.0
                    }
                    subtitle {
                        hJust = 0.5
                        margin = Margin(5.0)
                        fontSize = 11.0
                    }
                    caption {
                        hJust = 1.0
                        margin = Margin(10.0, 0.0, 0.0, 0.0)
                    }
                    margin = Margin(5.0, 30.0, 20.0, 5.0)
                }
            }
        }
    }
}

plotRatioWinners

In [19]:
val missedLifts by column<Int>()

fun addWhichLiftsWereMissed(df: DataFrame<Line_15_jupyter._DataFrameType>): AnyFrame {

    return df.add(successfulLifts) {
        columns.count { lift -> it[lift].toInt() > 0 }
    }
        .add(missedLifts) {
            columns.count { lift -> it[lift].toInt() <= 0 }
        }
        .let { frame ->
            columns.fold(frame) { acc, lift ->
                acc.add("missed_${lift.name()}") {
                    if (it[lift].toInt() <= 0) 1 else 0
                }
            }
        }
        .filter { it[successfulLifts] == 8 }
        .groupBy { successfulLifts }
        .aggregate {
            columns
                .forEach { lift ->
                sum("missed_${lift.name()}") into "total_missed_${lift.name()}"
            }
        }
}

In [20]:
val missedLiftsDataFrame = addWhichLiftsWereMissed(data)


In [21]:
missedLiftsDataFrame

successfulLifts,total_missed_squat1kg,total_missed_squat2kg,total_missed_squat3kg,total_missed_bench1kg,total_missed_bench2kg,total_missed_bench3kg,total_missed_deadlift1kg,total_missed_deadlift2kg,total_missed_deadlift3kg
8,475,626,2049,391,714,4844,254,378,3075


In [22]:
val labels = missedLiftsDataFrame.map { row ->
    listOf(
        "S1" to row["total_missed_squat1kg"],
        "S2" to row["total_missed_squat2kg"],
        "S3" to row["total_missed_squat3kg"],
        "B1" to row["total_missed_bench1kg"],
        "B2" to row["total_missed_bench2kg"],
        "B3" to row["total_missed_bench3kg"],
        "D1" to row["total_missed_deadlift1kg"],
        "D2" to row["total_missed_deadlift2kg"],
        "D3" to row["total_missed_deadlift3kg"]
    )
}
    .flatten()

val countDataFrame = dataFrameOf(
    missedLifts.name() to labels.map { it.first },
    count.name() to labels.map { it.second }
)

val plotMissedLifts = countDataFrame.plot {
    bars {
        x(missedLifts) {
            axis {
//                elementText(size = 14)
            }
        }
        y(count) { axis.name = "count of attempts missed" }
        fillColor = Color.hex("#fec92e")
        borderLine {
            color = Color.hex("#777777")
            width = 0.5
        }
    }
    layout {
        title = "Most Commonly Missed Lift - All lifters"
        caption = "data: Open powerlfting meets 2023"
        size = 600 to 400
        xAxisLabel = "Lift"
        style {
            global {
                text {
                    fontFamily = FontFamily.custom("Helvetica Neue")
                }
                plotCanvas {
                    title {
                        hJust = 0.5
                        margin = Margin(10.0)
                        fontSize = 17.0
                    }
                    subtitle {
                        hJust = 0.5
                        margin = Margin(5.0)
                    }
                    caption {
                        hJust = 1.0
                        margin = Margin(10.0, 0.0, 0.0, 0.0)
                    }
                    margin = Margin(5.0, 30.0, 20.0, 5.0)
                }
            }
        }
    }
}
plotMissedLifts
//plotMissedLifts.save("most-commonly-missed-lifts-2015-2024.svg")
