## Data acquisition

The aim of this notebook is given the list of all events, extract the fights for each of these events and thus build the list of all fights since the first one in the UFC history.

In [6]:
import requests
import pandas as pd
from pandas.io.json import json_normalize

In [8]:
# get the list of all events from the beginning of the ufc
events_from_api = requests.get('http://ufc-data-api.ufc.com/api/v3/us/events')
df_events = pd.DataFrame(events_from_api.json(), columns=[
    'id',
    'base_title',
    'subtitle',
    'title_tag_line', 
    'short_description', 
    'event_date', 
    'event_status', 
    'location', 
    'arena'])
df_events['event_date'] = pd.to_datetime(df_events['event_date'])

In [4]:
def get_event_fights(event):
    """Make a get request to the UFC data API to get the fights of an event"""
    response = requests.get('http://ufc-data-api.ufc.com/api/v3/us/events/{}/fights'.format(event))
    return response.json()

In [9]:
def get_all_fights(events):
    """for each event of the given dataframe of events, get all fights, returns the list of all fights since the beginning of the UFC"""
    df_fights = pd.DataFrame()
    for event in events.itertuples():
        event_fights = json_normalize(get_event_fights(event.id))
        event_fights['event_date'] = event.event_date
        df_fights = df_fights.append(event_fights,sort=True)
    return df_fights

fights = get_all_fights(df_events)

In [10]:
# properly the column names, to make the reading easier
fights = fights.rename(index=str, 
                 columns={'fighter2_weight_class': 'weight_class',
                  'result.Method' : 'method',
                  'result.EndingTime' : 'ending_time',
                  'result.EndingRound' : 'ending_round',
                  'result.Submission' : 'submission',
                  'result.EndStrike' : 'end_strike',
                  'result.EndTarget' : 'end_target',
                  'result.EndPosition' : 'end_position',
                  'result.FightOfTheNight' : 'is_fotn'})

In [11]:
# save the data in a csv file, to avoid this long process of getting all fights each time I wan to analyze the dataset
fights.to_csv('../data/formated/fights.csv', 
          index=False, 
          columns=['fighter1_last_name',
                   'fighter2_last_name',
                   'weight_class', 
                   'fighter1reach', 
                   'fighter2reach', 
                   'fighter1height', 
                   'fighter2height',
                   'is_title_fight',
                   'is_main_event', 
                   'fighter1_rank', 
                   'fighter2_rank', 
                   'fighter1_is_winner', 
                   'fighter2_is_winner',
                   'method',
                   'ending_time',
                   'ending_round',
                   'submission',
                   'end_strike',
                   'end_target',
                   'end_position',
                   'is_fotn',
                   'event_date'
                  ])