In [11]:
import os
import math
import sqlite3

import numpy as np
import pandas as pd
from sqlalchemy import create_engine

db_path = os.path.join(os.path.dirname("__file__"), "..", "..", "data", "ufc.db")

In [49]:
query = """
WITH cte1 AS (
    SELECT
        id,
        red_fighter_id AS fighter_id,
        red_outcome AS outcome
    FROM
        ufcstats_bouts
    UNION
    SELECT
        id,
        blue_fighter_id AS fighter_id,
        blue_outcome AS outcome
    FROM
        ufcstats_bouts
),
cte2 AS (
    SELECT
        t1.fighter_id,
        t1.'order',
        t1.bout_id,
        t1.opponent_id,
        CASE
            WHEN t6.outcome = 'W' THEN 1
            ELSE 0
        END AS win,
        t4.venue_id,
        t5.latitude,
        t5.longitude,
        LAG(t5.latitude) OVER (PARTITION BY t1.fighter_id ORDER BY t1.'order') AS prev_latitude,
        LAG(t5.longitude) OVER (PARTITION BY t1.fighter_id ORDER BY t1.'order') AS prev_longitude,
        t5.elevation_meters,
        t5.capacity,
        t4.attendance
    FROM
        ufcstats_fighter_histories t1
    LEFT JOIN
        ufcstats_bouts t2
    ON t1.bout_id = t2.id
    LEFT JOIN   
        event_mapping t3
    ON t2.event_id = t3.ufcstats_id
    LEFT JOIN
        wikipedia_events t4
        ON t3.wikipedia_id = t4.id
    LEFT JOIN
        wikipedia_venues t5
        ON t4.venue_id = t5.id
    LEFT JOIN
        cte1 t6
        ON t1.bout_id = t6.id AND t1.fighter_id = t6.fighter_id
),
cte3 AS (
    SELECT
        fighter_id,
        t1.'order',
        bout_id,
        opponent_id,
        AVG(win) OVER (
            PARTITION BY fighter_id, venue_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS win_rate_at_venue,
        AVG(win) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS win_rate_temp,
        CASE
            WHEN latitude IS NULL OR prev_latitude IS NULL OR longitude IS NULL OR prev_longitude IS NULL THEN NULL
            ELSE 111.0 * DEGREES(
                ACOS(MIN(1.0, COS(RADIANS(latitude))
                    * COS(RADIANS(prev_latitude))
                    * COS(RADIANS(longitude - prev_longitude))
                    + SIN(RADIANS(latitude))
                    * SIN(RADIANS(prev_latitude))))
            )
        END AS distance_km_change,
        AVG(elevation_meters) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_elevation_meters,
        elevation_meters - LAG(elevation_meters) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
        ) AS elevation_meters_change,
        AVG(capacity) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_capacity,
        capacity - LAG(capacity) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
        ) AS event_capacity_change,
        AVG(attendance) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_attendance,
        attendance - LAG(attendance) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
        ) AS event_attendance_change,
        1.0 * attendance / capacity AS event_occupancy_pct,
        1.0 * attendance / capacity - LAG(1.0 * attendance / capacity) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
        ) AS event_occupancy_pct_change
    FROM
        cte2 t1
    ORDER BY fighter_id, t1.'order'
),
cte4 AS (
    SELECT
        fighter_id,
        t1.'order',
        bout_id,
        opponent_id,
        CASE
            WHEN win_rate_at_venue IS NULL THEN win_rate_temp
            ELSE win_rate_at_venue
        END AS win_rate_at_venue,
        distance_km_change,
        AVG(distance_km_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_distance_km_change,
        avg_elevation_meters,
        elevation_meters_change,
        AVG(elevation_meters_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_elevation_meters_change,
        avg_event_capacity,
        event_capacity_change,
        AVG(event_capacity_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_capacity_change,
        avg_event_attendance,
        event_attendance_change,
        AVG(event_attendance_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_attendance_change,
        AVG(event_occupancy_pct) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_occupancy_pct,
        LAG(event_occupancy_pct_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
        ) AS event_occupancy_pct_change,
        AVG(event_occupancy_pct_change) OVER (
            PARTITION BY fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_event_occupancy_pct_change
    FROM
        cte3 t1
),
cte5 AS (
    SELECT
        t1.*,
        AVG(t2.win_rate_at_venue) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_win_rate_at_venue,
        AVG(t1.win_rate_at_venue - t2.win_rate_at_venue) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_win_rate_at_venue_diff,
        AVG(t2.distance_km_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_distance_km_change,
        AVG(t1.distance_km_change - t2.distance_km_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_distance_km_change_diff,
        AVG(t2.avg_distance_km_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_distance_km_change,
        AVG(t1.distance_km_change - t2.avg_distance_km_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_distance_km_change_diff,
        AVG(t2.avg_elevation_meters) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_elevation_meters,
        AVG(t1.avg_elevation_meters - t2.avg_elevation_meters) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_elevation_meters_diff,
        AVG(t2.elevation_meters_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_elevation_meters_change,
        AVG(t1.elevation_meters_change - t2.elevation_meters_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_elevation_meters_change_diff,
        AVG(t2.avg_elevation_meters_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_elevation_meters_change,
        AVG(t1.avg_elevation_meters_change - t2.avg_elevation_meters_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_elevation_meters_change_diff,
        AVG(t2.avg_event_capacity) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_capacity,
        AVG(t1.avg_event_capacity - t2.avg_event_capacity) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_capacity_diff,
        AVG(t2.event_capacity_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_capacity_change,
        AVG(t1.event_capacity_change - t2.event_capacity_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_capacity_change_diff,
        AVG(t2.avg_event_capacity_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_capacity_change,
        AVG(t1.avg_event_capacity_change - t2.avg_event_capacity_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_capacity_change_diff,
        AVG(t2.avg_event_attendance) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_attendance,
        AVG(t1.avg_event_attendance - t2.avg_event_attendance) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_attendance_diff,
        AVG(t2.event_attendance_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_attendance_change,
        AVG(t1.event_attendance_change - t2.event_attendance_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_attendance_change_diff,
        AVG(t2.avg_event_attendance_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_attendance_change,
        AVG(t1.avg_event_attendance_change - t2.avg_event_attendance_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_attendance_change_diff,
        AVG(t2.avg_event_occupancy_pct) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_occupancy_pct,
        AVG(t1.avg_event_occupancy_pct - t2.avg_event_occupancy_pct) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_occupancy_pct_diff,
        AVG(t2.event_occupancy_pct_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_occupancy_pct_change,
        AVG(t1.event_occupancy_pct_change - t2.event_occupancy_pct_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_event_occupancy_pct_change_diff,
        AVG(t2.avg_event_occupancy_pct_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_occupancy_pct_change,
        AVG(t1.avg_event_occupancy_pct_change - t2.avg_event_occupancy_pct_change) OVER (
            PARTITION BY t1.fighter_id
            ORDER BY t1.'order'
            ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
        ) AS avg_opp_avg_event_occupancy_pct_change_diff
    FROM
        cte4 t1
    LEFT JOIN
        cte4 t2
    ON t1.fighter_id = t2.opponent_id AND t1.bout_id = t2.bout_id AND t1.opponent_id = t2.fighter_id
)
SELECT
    id,
    t2.win_rate_at_venue - t3.win_rate_at_venue AS win_rate_at_venue_diff,
    1.0 * t2.win_rate_at_venue / t3.win_rate_at_venue AS win_rate_at_venue_ratio,
    t2.distance_km_change - t3.distance_km_change AS distance_km_change_diff,
    1.0 * t2.distance_km_change / t3.distance_km_change AS distance_km_change_ratio,
    t2.avg_distance_km_change - t3.avg_distance_km_change AS avg_distance_km_change_diff,
    1.0 * t2.avg_distance_km_change / t3.avg_distance_km_change AS avg_distance_km_change_ratio,
    t2.avg_elevation_meters - t3.avg_elevation_meters AS avg_elevation_meters_diff,
    1.0 * t2.avg_elevation_meters / t3.avg_elevation_meters AS avg_elevation_meters_ratio,
    t2.elevation_meters_change - t3.elevation_meters_change AS elevation_meters_change_diff,
    1.0 * t2.elevation_meters_change / t3.elevation_meters_change AS elevation_meters_change_ratio,
    t2.avg_elevation_meters_change - t3.avg_elevation_meters_change AS avg_elevation_meters_change_diff,
    1.0 * t2.avg_elevation_meters_change / t3.avg_elevation_meters_change AS avg_elevation_meters_change_ratio,
    t2.avg_event_capacity - t3.avg_event_capacity AS avg_event_capacity_diff,
    1.0 * t2.avg_event_capacity / t3.avg_event_capacity AS avg_event_capacity_ratio,
    t2.event_capacity_change - t3.event_capacity_change AS event_capacity_change_diff,
    1.0 * t2.event_capacity_change / t3.event_capacity_change AS event_capacity_change_ratio,
    t2.avg_event_capacity_change - t3.avg_event_capacity_change AS avg_event_capacity_change_diff,
    1.0 * t2.avg_event_capacity_change / t3.avg_event_capacity_change AS avg_event_capacity_change_ratio,
    t2.avg_event_attendance - t3.avg_event_attendance AS avg_event_attendance_diff,
    1.0 * t2.avg_event_attendance / t3.avg_event_attendance AS avg_event_attendance_ratio,
    t2.event_attendance_change - t3.event_attendance_change AS event_attendance_change_diff,
    1.0 * t2.event_attendance_change / t3.event_attendance_change AS event_attendance_change_ratio,
    t2.avg_event_attendance_change - t3.avg_event_attendance_change AS avg_event_attendance_change_diff,
    1.0 * t2.avg_event_attendance_change / t3.avg_event_attendance_change AS avg_event_attendance_change_ratio,
    t2.avg_event_occupancy_pct - t3.avg_event_occupancy_pct AS avg_event_occupancy_pct_diff,
    1.0 * t2.avg_event_occupancy_pct / t3.avg_event_occupancy_pct AS avg_event_occupancy_pct_ratio,
    t2.event_occupancy_pct_change - t3.event_occupancy_pct_change AS event_occupancy_pct_change_diff,
    1.0 * t2.event_occupancy_pct_change / t3.event_occupancy_pct_change AS event_occupancy_pct_change_ratio,
    t2.avg_event_occupancy_pct_change - t3.avg_event_occupancy_pct_change AS avg_event_occupancy_pct_change_diff,
    1.0 * t2.avg_event_occupancy_pct_change / t3.avg_event_occupancy_pct_change AS avg_event_occupancy_pct_change_ratio,
    t2.avg_opp_win_rate_at_venue - t3.win_rate_at_venue AS avg_opp_win_rate_at_venue_diff,
    1.0 * t2.avg_opp_win_rate_at_venue / t3.win_rate_at_venue AS avg_opp_win_rate_at_venue_ratio,
    t2.avg_opp_win_rate_at_venue_diff - t3.avg_opp_win_rate_at_venue_diff AS avg_opp_win_rate_at_venue_diff_diff,
    1.0 * t2.avg_opp_win_rate_at_venue_diff / t3.avg_opp_win_rate_at_venue_diff AS avg_opp_win_rate_at_venue_diff_ratio,
    t2.avg_opp_distance_km_change - t3.avg_opp_distance_km_change AS avg_opp_distance_km_change_diff,
    1.0 * t2.avg_opp_distance_km_change / t3.avg_opp_distance_km_change AS avg_opp_distance_km_change_ratio,
    t2.avg_opp_distance_km_change_diff - t3.avg_opp_distance_km_change_diff AS avg_opp_distance_km_change_diff_diff,
    1.0 * t2.avg_opp_distance_km_change_diff / t3.avg_opp_distance_km_change_diff AS avg_opp_distance_km_change_diff_ratio,
    t2.avg_opp_avg_distance_km_change - t3.avg_opp_avg_distance_km_change AS avg_opp_avg_distance_km_change_diff,
    1.0 * t2.avg_opp_avg_distance_km_change / t3.avg_opp_avg_distance_km_change AS avg_opp_avg_distance_km_change_ratio,
    t2.avg_opp_avg_distance_km_change_diff - t3.avg_opp_avg_distance_km_change_diff AS avg_opp_avg_distance_km_change_diff_diff,
    1.0 * t2.avg_opp_avg_distance_km_change_diff / t3.avg_opp_avg_distance_km_change_diff AS avg_opp_avg_distance_km_change_diff_ratio,
    t2.avg_opp_avg_elevation_meters - t3.avg_opp_avg_elevation_meters AS avg_opp_avg_elevation_meters_diff,
    1.0 * t2.avg_opp_avg_elevation_meters / t3.avg_opp_avg_elevation_meters AS avg_opp_avg_elevation_meters_ratio,
    t2.avg_opp_avg_elevation_meters_diff - t3.avg_opp_avg_elevation_meters_diff AS avg_opp_avg_elevation_meters_diff_diff,
    1.0 * t2.avg_opp_avg_elevation_meters_diff / t3.avg_opp_avg_elevation_meters_diff AS avg_opp_avg_elevation_meters_diff_ratio,
    t2.avg_opp_elevation_meters_change - t3.avg_opp_elevation_meters_change AS avg_opp_elevation_meters_change_diff,
    1.0 * t2.avg_opp_elevation_meters_change / t3.avg_opp_elevation_meters_change AS avg_opp_elevation_meters_change_ratio,
    t2.avg_opp_elevation_meters_change_diff - t3.avg_opp_elevation_meters_change_diff AS avg_opp_elevation_meters_change_diff_diff,
    1.0 * t2.avg_opp_elevation_meters_change_diff / t3.avg_opp_elevation_meters_change_diff AS avg_opp_elevation_meters_change_diff_ratio,
    t2.avg_opp_avg_elevation_meters_change - t3.avg_opp_avg_elevation_meters_change AS avg_opp_avg_elevation_meters_change_diff,
    1.0 * t2.avg_opp_avg_elevation_meters_change / t3.avg_opp_avg_elevation_meters_change AS avg_opp_avg_elevation_meters_change_ratio,
    t2.avg_opp_avg_elevation_meters_change_diff - t3.avg_opp_avg_elevation_meters_change_diff AS avg_opp_avg_elevation_meters_change_diff_diff,
    1.0 * t2.avg_opp_avg_elevation_meters_change_diff / t3.avg_opp_avg_elevation_meters_change_diff AS avg_opp_avg_elevation_meters_change_diff_ratio,
    t2.avg_opp_avg_event_capacity - t3.avg_opp_avg_event_capacity AS avg_opp_avg_event_capacity_diff,
    1.0 * t2.avg_opp_avg_event_capacity / t3.avg_opp_avg_event_capacity AS avg_opp_avg_event_capacity_ratio,
    t2.avg_opp_avg_event_capacity_diff - t3.avg_opp_avg_event_capacity_diff AS avg_opp_avg_event_capacity_diff_diff,
    1.0 * t2.avg_opp_avg_event_capacity_diff / t3.avg_opp_avg_event_capacity_diff AS avg_opp_avg_event_capacity_diff_ratio,
    t2.avg_opp_event_capacity_change - t3.avg_opp_event_capacity_change AS avg_opp_event_capacity_change_diff,
    1.0 * t2.avg_opp_event_capacity_change / t3.avg_opp_event_capacity_change AS avg_opp_event_capacity_change_ratio,
    t2.avg_opp_event_capacity_change_diff - t3.avg_opp_event_capacity_change_diff AS avg_opp_event_capacity_change_diff_diff,
    1.0 * t2.avg_opp_event_capacity_change_diff / t3.avg_opp_event_capacity_change_diff AS avg_opp_event_capacity_change_diff_ratio,
    t2.avg_opp_avg_event_capacity_change - t3.avg_opp_avg_event_capacity_change AS avg_opp_avg_event_capacity_change_diff,
    1.0 * t2.avg_opp_avg_event_capacity_change / t3.avg_opp_avg_event_capacity_change AS avg_opp_avg_event_capacity_change_ratio,
    t2.avg_opp_avg_event_capacity_change_diff - t3.avg_opp_avg_event_capacity_change_diff AS avg_opp_avg_event_capacity_change_diff_diff,
    1.0 * t2.avg_opp_avg_event_capacity_change_diff / t3.avg_opp_avg_event_capacity_change_diff AS avg_opp_avg_event_capacity_change_diff_ratio,
    t2.avg_opp_avg_event_attendance - t3.avg_opp_avg_event_attendance AS avg_opp_avg_event_attendance_diff,
    1.0 * t2.avg_opp_avg_event_attendance / t3.avg_opp_avg_event_attendance AS avg_opp_avg_event_attendance_ratio,
    t2.avg_opp_avg_event_attendance_diff - t3.avg_opp_avg_event_attendance_diff AS avg_opp_avg_event_attendance_diff_diff,
    1.0 * t2.avg_opp_avg_event_attendance_diff / t3.avg_opp_avg_event_attendance_diff AS avg_opp_avg_event_attendance_diff_ratio,
    t2.avg_opp_event_attendance_change - t3.avg_opp_event_attendance_change AS avg_opp_event_attendance_change_diff,
    1.0 * t2.avg_opp_event_attendance_change / t3.avg_opp_event_attendance_change AS avg_opp_event_attendance_change_ratio,
    t2.avg_opp_event_attendance_change_diff - t3.avg_opp_event_attendance_change_diff AS avg_opp_event_attendance_change_diff_diff,
    1.0 * t2.avg_opp_event_attendance_change_diff / t3.avg_opp_event_attendance_change_diff AS avg_opp_event_attendance_change_diff_ratio,
    t2.avg_opp_avg_event_attendance_change - t3.avg_opp_avg_event_attendance_change AS avg_opp_avg_event_attendance_change_diff,
    1.0 * t2.avg_opp_avg_event_attendance_change / t3.avg_opp_avg_event_attendance_change AS avg_opp_avg_event_attendance_change_ratio,
    t2.avg_opp_avg_event_attendance_change_diff - t3.avg_opp_avg_event_attendance_change_diff AS avg_opp_avg_event_attendance_change_diff_diff,
    1.0 * t2.avg_opp_avg_event_attendance_change_diff / t3.avg_opp_avg_event_attendance_change_diff AS avg_opp_avg_event_attendance_change_diff_ratio,
    t2.avg_opp_avg_event_occupancy_pct - t3.avg_opp_avg_event_occupancy_pct AS avg_opp_avg_event_occupancy_pct_diff,
    1.0 * t2.avg_opp_avg_event_occupancy_pct / t3.avg_opp_avg_event_occupancy_pct AS avg_opp_avg_event_occupancy_pct_ratio,
    t2.avg_opp_avg_event_occupancy_pct_diff - t3.avg_opp_avg_event_occupancy_pct_diff AS avg_opp_avg_event_occupancy_pct_diff_diff,
    1.0 * t2.avg_opp_avg_event_occupancy_pct_diff / t3.avg_opp_avg_event_occupancy_pct_diff AS avg_opp_avg_event_occupancy_pct_diff_ratio,
    t2.avg_opp_event_occupancy_pct_change - t3.avg_opp_event_occupancy_pct_change AS avg_opp_event_occupancy_pct_change_diff,
    1.0 * t2.avg_opp_event_occupancy_pct_change / t3.avg_opp_event_occupancy_pct_change AS avg_opp_event_occupancy_pct_change_ratio,
    t2.avg_opp_event_occupancy_pct_change_diff - t3.avg_opp_event_occupancy_pct_change_diff AS avg_opp_event_occupancy_pct_change_diff_diff,
    1.0 * t2.avg_opp_event_occupancy_pct_change_diff / t3.avg_opp_event_occupancy_pct_change_diff AS avg_opp_event_occupancy_pct_change_diff_ratio,
    t2.avg_opp_avg_event_occupancy_pct_change - t3.avg_opp_avg_event_occupancy_pct_change AS avg_opp_avg_event_occupancy_pct_change_diff,
    1.0 * t2.avg_opp_avg_event_occupancy_pct_change / t3.avg_opp_avg_event_occupancy_pct_change AS avg_opp_avg_event_occupancy_pct_change_ratio,
    t2.avg_opp_avg_event_occupancy_pct_change_diff - t3.avg_opp_avg_event_occupancy_pct_change_diff AS avg_opp_avg_event_occupancy_pct_change_diff_diff,
    1.0 * t2.avg_opp_avg_event_occupancy_pct_change_diff / t3.avg_opp_avg_event_occupancy_pct_change_diff AS avg_opp_avg_event_occupancy_pct_change_diff_ratio,
    CASE
        WHEN red_outcome = 'W' THEN 1
        ELSE 0
    END AS red_win
FROM ufcstats_bouts AS t1
LEFT JOIN cte5 AS t2
ON t1.id = t2.bout_id AND t1.red_fighter_id = t2.fighter_id
LEFT JOIN cte5 AS t3
ON t1.id = t3.bout_id AND t1.blue_fighter_id = t3.fighter_id
WHERE event_id IN (
    SELECT id FROM ufcstats_events
    WHERE is_ufc_event = 1 AND date >= '2008-04-19' AND date < '2021-01-01'
) AND red_outcome IN ('W', 'L') AND outcome_method != 'DQ'
"""

with sqlite3.connect(db_path) as conn:
    conn.create_function("ACOS", 1, math.acos)
    conn.create_function("COS", 1, math.cos)
    conn.create_function("SIN", 1, math.sin)
    conn.create_function("RADIANS", 1, math.radians)
    conn.create_function("DEGREES", 1, math.degrees)

    df = pd.read_sql(query, conn)

df

Unnamed: 0,id,win_rate_at_venue_diff,win_rate_at_venue_ratio,distance_km_change_diff,distance_km_change_ratio,avg_distance_km_change_diff,avg_distance_km_change_ratio,avg_elevation_meters_diff,avg_elevation_meters_ratio,elevation_meters_change_diff,...,avg_opp_avg_event_occupancy_pct_diff_ratio,avg_opp_event_occupancy_pct_change_diff,avg_opp_event_occupancy_pct_change_ratio,avg_opp_event_occupancy_pct_change_diff_diff,avg_opp_event_occupancy_pct_change_diff_ratio,avg_opp_avg_event_occupancy_pct_change_diff,avg_opp_avg_event_occupancy_pct_change_ratio,avg_opp_avg_event_occupancy_pct_change_diff_diff,avg_opp_avg_event_occupancy_pct_change_diff_ratio,red_win
0,be38ed9ccfe2ee03,0.250000,2.000000,0.000000,1.000000,4.408011,3.934822,24.266667,1.036506,0.0,...,-0.578962,-0.167808,-2.842534,0.596338,-1.047550,-0.008228,3.371678,0.201749,-0.346577,1
1,eb1b371dfc37fcdb,,,,,,,,,,...,,,,,,,,,,1
2,219bd976b8ca745d,0.000000,1.000000,1376.289555,1.381817,1227.089791,1.545978,-4.066667,0.987856,630.2,...,-0.671715,-0.102990,0.524324,-0.649139,14.100619,0.073671,1.710782,-0.509056,3.008678,0
3,af178adff964d854,-0.400000,0.600000,-612.108931,0.464465,,,-17.600000,0.881879,139.0,...,,,,,,,,,,0
4,920194911d727a38,0.600000,,,,,,,,,...,,-0.346601,0.465878,,,0.080951,1.249710,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4907,cd150cd28738a7c5,-0.416667,0.583333,13174.654949,,2698.686868,1.632944,38.608333,1.249569,733.0,...,3.634846,0.058628,0.490896,0.054645,0.203545,-0.029565,0.467734,0.102882,0.230896,1
4908,8955ea3c7c332e6c,-0.075000,0.892857,9819.200948,3.926341,1183.774553,1.230846,-313.955000,0.306146,13.2,...,1.983314,0.009874,0.877709,0.107609,-2.129261,0.027273,0.324473,-0.085914,128.651636,0
4909,3d35eb2d46bf74de,0.000000,1.000000,0.000000,,133.279886,1.067482,276.175000,1.738633,0.0,...,-0.107435,0.535203,-0.676362,,,0.132436,-0.829917,,,1
4910,014f1da2083ca174,-0.250000,0.750000,13174.654949,,690.697773,1.102893,-73.370417,0.774610,733.0,...,1.526362,0.033542,0.399282,-0.028804,0.359458,0.009293,0.880546,-0.045081,-0.156793,1
