In [1]:
# standard library imports
import os
import zipfile

# third party imports
import numpy as np
import pandas as pd

# local imports


data_dir = os.path.join(os.path.dirname("__file__"), "..", "..", "data")
raw_data_dir = os.path.join(data_dir, "raw")
clean_data_dir = os.path.join(data_dir, "clean")

In [8]:
bestfightodds_prop_odds = pd.read_csv(os.path.join(raw_data_dir, "Best Fight Odds", "closing_with_props.csv"), parse_dates=["Card_Date"])

# drop bad events
bad_events = [
    "UFC 9: Motor City Madness",
    "UFC 14: Showdown",
    "UFC 15: Collision Course",
    "UFC 18: The Road to the Heavyweight Title",
    'UFC 32: Showdown in the Meadowlands',
    'UFC 35: Throwdown',
    'UFC 48: Payback',
    'UFC 53: Heavy Hitters',
    'UFC 56: Full Force',
]
bestfightodds_prop_odds = bestfightodds_prop_odds.loc[~bestfightodds_prop_odds["Event"].isin(bad_events)]

# fix event urls
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 73: Stacked", "url"] = "https://www.bestfightodds.com/events/ufc-73-stacked-1"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 74: Respect", "url"] = "https://www.bestfightodds.com/events/ufc-74-respect-7"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 76: Knockout", "url"] = "https://www.bestfightodds.com/events/ufc-76-knockout-12"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 84: Ill Will", "url"] = "https://www.bestfightodds.com/events/ufc-84-ill-will-47"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 85: Bedlam", "url"] = "https://www.bestfightodds.com/events/ufc-85-bedlam-46"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 87: Seek and Destroy", "url"] = "https://www.bestfightodds.com/events/ufc-87-seek-and-destroy-57"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 99: The Comeback", "url"] = "https://www.bestfightodds.com/events/ufc-99-the-comeback-136"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 100", "url"] = "https://www.bestfightodds.com/events/ufc-100-137"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 101: Declaration", "url"] = "https://www.bestfightodds.com/events/ufc-101-declaration-145"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 109: Relentless", "url"] = "https://www.bestfightodds.com/events/ufc-109-relentless-226"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 112: Invincible", "url"] = "https://www.bestfightodds.com/events/ufc-112-invincible-245"
bestfightodds_prop_odds.loc[bestfightodds_prop_odds["Event"] == "UFC 119: Mir vs. Cro Cop", "url"] = "https://www.bestfightodds.com/events/ufc-119-mir-vs-cro-cop-296"

bestfightodds_prop_odds["event_id"] = bestfightodds_prop_odds["url"].str.split("/").str[-1].str.split("-").str[-1].astype(int)
bestfightodds_prop_odds = bestfightodds_prop_odds.rename(columns={"William_H": "William Hill"})

df = bestfightodds_prop_odds[["event_id", "Event"]].rename(columns={"Event": "name"}).drop_duplicates()
df = df.sort_values("event_id").reset_index(drop=True)
df.to_csv(os.path.join(clean_data_dir, "Best Fight Odds", "events.csv"), index=False)

In [15]:
bestfightodds_fighters = pd.read_csv(os.path.join(clean_data_dir, "Tapology", "fighters.csv"))[["bestfightodds_id", "name", "nickname"]]
bestfightodds_fighters = bestfightodds_fighters.rename(columns={"bestfightodds_id": "id"})
bestfightodds_fighters = bestfightodds_fighters.dropna(subset=["id"])
bestfightodds_fighters["id"] = bestfightodds_fighters["id"].astype(int)
bestfightodds_fighters = bestfightodds_fighters.sort_values("id").reset_index(drop=True)
bestfightodds_fighters.to_csv(os.path.join(clean_data_dir, "Best Fight Odds", "fighters.csv"), index=False)