In [1]:
import pymongo
from datetime import datetime
import pandas as pd

In [2]:
DB = pymongo.MongoClient('localhost', 27017)['mmadecoded']

In [18]:
def get_fights_with_glicko(db, event_ids=None):
    fight_with_glicko_col = db['sherdog_fights_with_glicko2']

    if event_ids:
        found = fight_with_glicko_col.find(
            {
                'event_id': {
                    '$in': event_ids
                }
            },
            no_cursor_timeout=True
        )
    else:
        found = fight_with_glicko_col.find()

    return found

In [4]:
def get_fighters(db):
    fighter_col = db['sherdog_fighters']

    found = fighter_col.find()

    return found

In [5]:
def get_fighter_age(dob_str, event_date):
    if dob_str is None or dob_str == "":
        return None
    fighter_dob = datetime.strptime(dob_str, '%Y-%m-%d')
    event_date = datetime.strptime(event_date, '%Y-%m-%d')

    age = (event_date - fighter_dob).days / 365.25

    return round(age, 2)


In [6]:
def get_events(db, date=None):
    events_col = db['sherdog_events']
    if date is None:
        found = events_col.find().sort("date", 1)
    else:
        found = events_col.find(
            {'date': {'$gte': date}},
        ).sort("date", 1)

    return found


In [7]:
def get_fighter_history(db, fighter_id):
    glicko_fights_col = db['sherdog_fights_with_glicko2']

    found = glicko_fights_col.find({
        '$or': [
            {
                'fighter1_id': fighter_id
            },
            {
                'fighter2_id': fighter_id
            }
        ]
    })

    return found

In [8]:
def get_last_3_fights(db, fighter_id, curr_date):
    fighter_hist = get_fighter_history(db, fighter_id)
    last_fights = []
    curr_date = datetime.strptime(curr_date, '%Y-%m-%d')
    
    for fight in fighter_hist:
        if fight['fighter1_id'] == fighter_id:
            info = fight['fighter1_glicko2_info']
        else:
            info = fight['fighter2_glicko2_info']
        fight_date = event_dates[str(fight['event_id'])] 
        
        fight_date = datetime.strptime(fight_date, '%Y-%m-%d')

        if fight_date < curr_date:
            fight_info = {
                'date': fight_date,
                'result': info['result']
            }
            last_fights.append(fight_info)
        
    return sorted(last_fights, key=lambda i: i['date'], reverse=True)[:3]


In [9]:
def get_inactivity(last_fights, curr_date):
    if len(last_fights) == 0:
        return None
    else:
        curr_date = datetime.strptime(curr_date, '%Y-%m-%d')
        inactivity = (curr_date - last_fights[0]['date']).days

    return round(inactivity, 2)


In [10]:
def get_streak(last_fights):
    results_char = []
    for fight in reversed(last_fights):
        if fight['result'] == 'LOSS':
            results_char.append('L')
        else:
            results_char.append('W')
    
    return ''.join(results_char)


In [11]:
fighters_db = get_fighters(DB)
fighters = {}
for fighter_db in fighters_db:
    fighters[str(fighter_db['_id'])] = {
        'name': fighter_db['name'],
        'date_of_birth': fighter_db['date_of_birth'],
        'height_cm': fighter_db['height_cm'],
        'weight_kg': fighter_db['weight_kg']
    }    


In [12]:
events_db = get_events(DB)
event_dates = {}
for event_db in events_db:
    event_dates[str(event_db['_id'])] = event_db['date']


In [13]:
fights_with_glicko_db = get_fights_with_glicko(DB)
fighter_hist = {}
i = 1
for fight_db in fights_with_glicko_db:
    try:
        event_a = {
            'date': event_dates[str(fight_db['event_id'])],
            'result': fight_db['fighter1_glicko2_info']['result']
        }

        event_b = {
            'date': event_dates[str(fight_db['event_id'])],
            'result': fight_db['fighter2_glicko2_info']['result']
        }

        if str(fight_db['fighter1_id']) in fighter_hist:
            fighter_hist[str(fight_db['fighter1_id'])].append(event_a)
        else:
            fighter_hist[str(fight_db['fighter1_id'])] = [event_a]

        if str(fight_db['fighter2_id']) in fighter_hist:
            fighter_hist[str(fight_db['fighter2_id'])].append(event_b)
        else:
            fighter_hist[str(fight_db['fighter2_id'])] = [event_b]
        i += 1
    except:
        print(i)
    
print(len(fights))   

In [None]:
fights_with_glicko_db = get_fights_with_glicko(DB)
fights = []
i = 1
for fight_db in fights_with_glicko_db:
    try:
        fight = {}
        date = event_dates[str(fight_db['event_id'])]
        fighter_a = fighters[str(fight_db['fighter2_id'])]
        fighter_b = fighters[str(fight_db['fighter1_id'])]

        fighter_a_hist = get_last_3_fights(DB, fight_db['fighter2_id'], date)
        fighter_b_hist = get_last_3_fights(DB, fight_db['fighter1_id'], date)

        fight['a_name'] = fighter_a['name']
        fight['a_age'] = get_fighter_age(fighter_a['date_of_birth'], date)
        fight['a_height_cm'] = fighter_a['height_cm']
        fight['a_weight_kg'] = fighter_a['weight_kg']
        fight['a_mu'] = fight_db['fighter2_glicko2_info']['mu']
        fight['a_phi'] = fight_db['fighter2_glicko2_info']['phi']
        fight['a_sigma'] = fight_db['fighter2_glicko2_info']['sigma']
        fight['a_fight_count'] = fight_db['fighter2_glicko2_info']['fight_count']
        fight['a_inactivity'] = get_inactivity(fighter_a_hist, date)
        fight['a_streak'] = get_streak(fighter_a_hist)    

        fight['b_name'] = fighter_b['name']
        fight['b_age'] = get_fighter_age(fighter_b['date_of_birth'], date)
        fight['b_height_cm'] = fighter_b['height_cm']
        fight['b_weight_kg'] = fighter_b['weight_kg']
        fight['b_mu'] = fight_db['fighter1_glicko2_info']['mu']
        fight['b_phi'] = fight_db['fighter1_glicko2_info']['phi']
        fight['b_sigma'] = fight_db['fighter1_glicko2_info']['sigma']
        fight['b_fight_count'] = fight_db['fighter1_glicko2_info']['fight_count']
        fight['b_inactivity'] = get_inactivity(fighter_b_hist, date)
        fight['b_streak'] = get_streak(fighter_a_hist)  

        fight['result'] = fight_db['fighter2_glicko2_info']['result']

        fights.append(fight)

        fight = {}
        fighter_a = fighters[str(fight_db['fighter1_id'])]
        fighter_b = fighters[str(fight_db['fighter2_id'])]

        fight['a_name'] = fighter_a['name']
        fight['a_age'] = get_fighter_age(fighter_a['date_of_birth'], date)
        fight['a_height_cm'] = fighter_a['height_cm']
        fight['a_weight_kg'] = fighter_a['weight_kg']
        fight['a_mu'] = fight_db['fighter1_glicko2_info']['mu']
        fight['a_phi'] = fight_db['fighter1_glicko2_info']['phi']
        fight['a_sigma'] = fight_db['fighter1_glicko2_info']['sigma']
        fight['a_fight_count'] = fight_db['fighter1_glicko2_info']['fight_count']
        fight['a_inactivity'] = get_inactivity(fighter_b_hist, date)
        fight['a_streak'] = get_streak(fighter_b_hist)  

        fight['b_name'] = fighter_b['name']
        fight['b_age'] = get_fighter_age(fighter_b['date_of_birth'], date)
        fight['b_height_cm'] = fighter_b['height_cm']
        fight['b_weight_kg'] = fighter_b['weight_kg']
        fight['b_mu'] = fight_db['fighter2_glicko2_info']['mu']
        fight['b_phi'] = fight_db['fighter2_glicko2_info']['phi']
        fight['b_sigma'] = fight_db['fighter2_glicko2_info']['sigma']
        fight['b_fight_count'] = fight_db['fighter2_glicko2_info']['fight_count']
        fight['b_inactivity'] = get_inactivity(fighter_a_hist, date)
        fight['b_streak'] = get_streak(fighter_a_hist)  

        fight['result'] = fight_db['fighter1_glicko2_info']['result']

        fights.append(fight)
        i += 1
        
        if i % 1000 == 0:
            print(i)

    except:
        print('{}'.format(str(i)))
    
print(len(fights))

18
28
121
313


In [48]:
fights_df = pd.DataFrame.from_records(fights)

In [49]:
fights_df.shape

(677930, 17)

In [50]:
fights_df.to_csv('fights_20210109.csv')

In [16]:
len(fights)

38428

TypeError: object of type 'Cursor' has no len()