In [2]:
import sqlite3
import pandas as pd

In [5]:
conn = sqlite3.connect("../data/ufcstats.db")
conn.execute("ATTACH DATABASE '../data/sherdog.db' AS sherdog")

df = pd.read_sql(
    """
WITH mma_debuts AS ( 
  SELECT
    FIGHTER_ID,
    MIN(DATE) AS SHERDOG_DEBUT_DATE
  FROM
    sherdog.SHERDOG_BOUT_HISTORY
  GROUP BY
    FIGHTER_ID
),
stacked_sherdog_raw AS (
  SELECT
    t1.FIGHTER_ID,
    julianday(t1.DATE) - julianday(t3.DATE_OF_BIRTH) AS AGE_DAYS,
    julianday(t1.DATE) - julianday(t2.SHERDOG_DEBUT_DATE) AS DAYS_SINCE_DEBUT,
    julianday(t1.DATE) - julianday(LAG(t1.DATE, 1) OVER (PARTITION BY t1.FIGHTER_ID ORDER BY t1.FIGHTER_BOUT_ORDINAL)) AS DAYS_SINCE_LAST_FIGHT,
    CASE t1.OUTCOME
      WHEN 'W' THEN 1
      ELSE 0
    END AS WIN,
    CASE t1.OUTCOME
      WHEN 'L' THEN 1
      ELSE 0
    END AS LOSS,
    CASE
      WHEN t1.OUTCOME = 'W' AND t1.OUTCOME_METHOD IN ('KO', 'TKO') THEN 1
      ELSE 0
    END AS WIN_BY_KO_TKO,
    CASE
      WHEN t1.OUTCOME = 'W' AND t1.OUTCOME_METHOD IN ('Submission', 'Technical Submission') THEN 1
      ELSE 0
    END AS WIN_BY_SUBMISSION,
    CASE
      WHEN t1.OUTCOME = 'W' AND t1.OUTCOME_METHOD = 'Decision' THEN 1
      ELSE 0
    END AS WIN_BY_DECISION,
    CASE
      WHEN t1.OUTCOME = 'L' AND t1.OUTCOME_METHOD IN ('KO', 'TKO') THEN 1
      ELSE 0
    END AS LOSS_BY_KO_TKO,
    CASE
      WHEN t1.OUTCOME = 'L' AND t1.OUTCOME_METHOD IN ('Submission', 'Technical Submission') THEN 1
      ELSE 0
    END AS LOSS_BY_SUBMISSION,
    CASE
      WHEN t1.OUTCOME = 'L' AND t1.OUTCOME_METHOD = 'Decision' THEN 1
      ELSE 0
    END AS LOSS_BY_DECISION,
    t1.FIGHTER_BOUT_ORDINAL AS TOTAL_FIGHTS,
    t1.TOTAL_TIME_SECONDS
  FROM
    sherdog.SHERDOG_BOUT_HISTORY AS t1
  LEFT JOIN
    mma_debuts AS t2
  ON 
    t1.FIGHTER_ID = t2.FIGHTER_ID
  LEFT JOIN
    sherdog.SHERDOG_FIGHTERS AS t3
  ON
    t1.FIGHTER_ID = t3.FIGHTER_ID
) 
--stacked_sherdog_features AS (
--  SELECT
--    FIGHTER_ID,
--    FIGHTER_BOUT_ORDINAL,
--    DAYS_SINCE_DEBUT,
--)

SELECT * FROM stacked_sherdog_raw
    """,
    conn,
)

conn.close()

In [6]:
df

Unnamed: 0,FIGHTER_ID,AGE_DAYS,DAYS_SINCE_DEBUT,DAYS_SINCE_LAST_FIGHT,WIN,LOSS,WIN_BY_KO_TKO,WIN_BY_SUBMISSION,WIN_BY_DECISION,LOSS_BY_KO_TKO,LOSS_BY_SUBMISSION,LOSS_BY_DECISION,TOTAL_FIGHTS,TOTAL_TIME_SECONDS
0,4,10815.0,0.0,,1,0,0,1,0,0,0,0,0,375.0
1,4,10838.0,23.0,23.0,1,0,0,1,0,0,0,0,1,743.0
2,4,10863.0,48.0,25.0,1,0,0,1,0,0,0,0,2,44.0
3,4,10867.0,52.0,4.0,1,0,0,1,0,0,0,0,3,109.0
4,4,10867.0,52.0,0.0,0,1,0,0,0,0,1,0,4,57.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54911,408471,9562.0,140.0,113.0,1,0,1,0,0,0,0,0,2,122.0
54912,408471,9597.0,175.0,35.0,1,0,1,0,0,0,0,0,3,98.0
54913,408471,9709.0,287.0,112.0,0,1,0,0,0,0,0,1,4,900.0
54914,408471,9835.0,413.0,126.0,1,0,1,0,0,0,0,0,5,84.0
