# ![](https://ga-dash.s3.amazonaws.com/production/assets/logo-9f88ae6c9c3871690e33280fcf557f33.png) Capstone Project, Part 5: Appendix

---

#### [Capstone Project, Part 1: Proposal](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part1-proposal.ipynb)
#### [Capstone Project, Part 2: Brief](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part2-brief.ipynb)
- [Writing data to MongoDB](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part2-brief.ipynb#mongo_db)
- [Data Dictionary](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part2-brief.ipynb#data_dictionary)
- [Map of races around the world](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part2-brief.ipynb#world-map)

#### [Capstone Project, Part 3: Technical Notebook](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb)
- [Feature Engineering](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#feature_eng)
- [Regression Approaches](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#regression_approaches)
- [Classification Approaches](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#classification_approaches)
- [Feature Importance](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#feature_importance)
- [Feature Selection](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#feature_selection)
- [Models Comparison](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part3-technical-notebook.ipynb#models_comparison)

#### [Capstone Project, Part 4: Presentation](https://61c08c5e1627a3416b0c37b4--pensive-nobel-d54f9f.netlify.app/)
#### [Capstone Project, Part 5: Appendix](https://nbviewer.org/github/jaeyow/f1-predictor/blob/main/final-project-part5-appendix.ipynb)

![](./images/f1-mclaren-car.png)

#### Writing data to MongoDB
I have created 4 main tables into the MongoDB database - **races**, **drivers**, **circuits**, and **results**, which represents all the race results recorded from 1950. 

![](images/mongodb-collections.png)

To create the database, the following Python scripts were used below. Basically we are connecting to Ergast API, then dumping the required data to our instance of MongoDB. If time permits, I will re-use these scripts to periodically ingest new racing results are they become available. 

In [33]:
import requests
import json
import pandas as pd
import pymongo

conn_str = "mongodb+srv://<username>:<password>@cluster0.pagvf.mongodb.net/f1Oracle?retryWrites=true&w=majority"
connect = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=5000)

seasons = requests.get("https://ergast.com/api/f1/seasons.json?limit=100")
f1_seasons = json.loads(seasons.text)["MRData"]["SeasonTable"]["Seasons"]

# write_races_to_db()
# write_drivers_to_db()
# write_circuits_to_db()
# write_raceresults_to_db()


def write_races_to_db():
    # Write Races to DB
    try:
        print("Connected successfully!!!")
        db = connect.f1Oracle
        collection = db.races

        print('Writing races to Mongo...')
        for season in f1_seasons:
            race_schedule = requests.get(f"http://ergast.com/api/f1/{season['season']}.json")
            races = json.loads(race_schedule.text)["MRData"]["RaceTable"]["Races"]
            for race in races:
                # add weather to the DB now to save time later when preparing the data for EDA and model creation                 
                race['weather'] = get_race_weather_from_wikipedia(race['url'])
                collection.insert_one(race)
        print('Writing races to Mongo... DONE')

    except Exception as e: # work on python 3.x
        print('Failed Mongo: '+ str(e))
        
def write_drivers_to_db():
    # Write Drivers to DB
    print("Connected successfully!!!")
    db = connect.f1Oracle
    collection = db.drivers

    print('Writing drivers to Mongo...')
    cts = requests.get(f"https://ergast.com/api/f1/drivers.json?limit=1000")
    drivers = json.loads(cts.text)["MRData"]["DriverTable"]["Drivers"]
    for driver in drivers:
        collection.insert_one(driver)
    print('Writing drivers to Mongo... DONE')        

def write_circuits_to_db():
    # Write Circuits to DB
    print("Connected successfully!!!")
    db = connect.f1Oracle
    collection = db.circuits

    print('Writing circuits to Mongo...')
    cts = requests.get(f"http://ergast.com/api/f1/circuits.json?limit=100")
    circuits = json.loads(cts.text)["MRData"]["CircuitTable"]["Circuits"]
    for circuit in circuits:
        collection.insert_one(circuit)
        print('Writing circuits to Mongo... DONE')
    
def write_raceresults_to_db():
    # Write Results to DB
    try:
        print("Connected successfully!!!")
        db = connect.f1Oracle
        collection = db.results

        print('Writing results to Mongo...')
        for season in f1_seasons:
            season_results = requests.get(f"http://ergast.com/api/f1/{season['season']}/results.json?limit=1000")
            races = json.loads(season_results.text)["MRData"]["RaceTable"]["Races"]
            for race_results in races:
                race_results['weather'] = get_race_weather_from_db(race_results['season'], race_results['round'])
                collection.insert_one(race_results)
        print('Writing results to Mongo... DONE')

    except Exception as e: # work on python 3.x
        print('Failed Mongo: '+ str(e))
        
def get_race_weather_from_db(season, round):
    db = connect.f1Oracle
    collection = db.races
    races = list(collection.find({}))
    races = [race for race in races if (race['season'] == season and race['round'] == round)]
    return races[0]['weather']
        
def get_race_weather_from_wikipedia(link):
    info = 'none'
    try:
        df = pd.read_html(link)[0]
        if 'Weather' in list(df.iloc[:,0]):
            n = list(df.iloc[:,0]).index('Weather')
            info = df.iloc[n,1]
        else:
            df = pd.read_html(link)[1]
            if 'Weather' in list(df.iloc[:,0]):
                n = list(df.iloc[:,0]).index('Weather')
                info = df.iloc[n,1]
            else:
                df = pd.read_html(link)[2]
                if 'Weather' in list(df.iloc[:,0]):
                    n = list(df.iloc[:,0]).index('Weather')
                    info = df.iloc[n,1]
                else:
                    df = pd.read_html(link)[3]
                    if 'Weather' in list(df.iloc[:,0]):
                        n = list(df.iloc[:,0]).index('Weather')
                        info = df.iloc[n,1]
                    else:
                        driver = webdriver.Chrome()
                        driver.get(link)

                        # italian page
                        button = driver.find_element_by_link_text('Italiano')
                        button.click()
                        info = driver.find_element_by_xpath('//*[@id="mw-content-text"]/div/table[1]/tbody/tr[9]/td').text

    except:
        info = 'Sunny' # Default to Sunny

    return info

Connected successfully!!!
Writing results to Mongo...
Writing results to Mongo... DONE


In [32]:
print("Connected successfully!!!")
db = connect.f1Oracle

collection = db.drivers
drivers = list(collection.find({}))

collection = db.races
races = list(collection.find({}))

collection = db.circuits
circuits = list(collection.find({}))

Connected successfully!!!


#### Reading from Mongo and creating CSV file for modelling

In [None]:
import requests
import json
import pandas as pd
import pymongo

conn_str = "mongodb+srv://<username>:<password>@cluster0.pagvf.mongodb.net/f1Oracle?retryWrites=true&w=majority"
connect = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=5000)

def get_categorical_weather(weather):
    weather = weather.replace(',',' ')
    weather = weather.replace('.',' ')
    weather = weather.replace(';',' ')
    weather = weather.replace("'",' ')
    weather = weather.replace('/',' ')
    weather = weather.replace('\xa0','.')
    weather_dict = {
        'weather_hot': ['hot'],
        'weather_warm': ['soleggiato', 'clear', 'warm', 'sunny', 'fine', 'mild', 'sereno'],
        'weather_cold': ['cold', 'fresh', 'chilly', 'cool'],
        'weather_dry': ['dry', 'asciutto'],
        'weather_wet': ['showers', 'wet', 'rain', 'pioggia', 'damp', 'thunderstorms', 'rainy', 'drizzly'],
        'weather_cloudy': ['overcast', 'nuvoloso', 'clouds', 'cloudy', 'grey', 'coperto']}
    
    categorical_weather = ''
    for key in weather_dict:
        categorical_weather = key if any(i in weather_dict[key] for i in weather.lower().split()) else 'No weather'
        if categorical_weather != 'No weather':
            break
        else:
            categorical_weather = 'weather_warm' # a few are formatted wierd, so impute to warm 
    return categorical_weather

def create_results_dataframe_from_db_collection():
    db = connect.f1Oracle
    collection = db.results
    races_results = list(collection.find({})) # MongoDB query about ~30 seconds

    for_da_result = {'Season':[],'Round':[],'Race Name':[],'Race Date':[],'Race Time':[],'Position':[],
                     'Points':[],'Grid':[],'Laps':[],'Status':[],'Driver':[],'DOB':[],
                     'Nationality':[],'Constructor':[],'Constructor Nat':[],'Circuit Name':[],'Race Url':[],
                     'Lat':[],'Long':[],'Locality':[],'Country':[],'Weather':[]}
        
    for race in races_results:
        for results in race['Results']:
            for_da_result['Season'].append(f"{race['season']}")
            for_da_result['Round'].append(int(race['round']))
            for_da_result['Race Name'].append(f"{race['raceName']}")
            for_da_result['Race Date'].append(f"{race['date']}")
            for_da_result['Race Time'].append(f"{race['time']}" if 'time' in results else '10:10:00Z')
            for_da_result['Position'].append(int(results['position']))
            for_da_result['Points'].append(float(results['points']))
            for_da_result['Grid'].append(int(results['grid']))
            for_da_result['Laps'].append(int(results['laps']))
            for_da_result['Status'].append(f"{results['status']}")
            for_da_result['Driver'].append(f"{results['Driver']['givenName']} {results['Driver']['familyName']}")
            for_da_result['DOB'].append(f"{results['Driver']['dateOfBirth']}")
            for_da_result['Nationality'].append(f"{results['Driver']['nationality']}")
            for_da_result['Constructor'].append(f"{results['Constructor']['name']}")
            for_da_result['Constructor Nat'].append(f"{results['Constructor']['nationality']}")
            for_da_result['Circuit Name'].append(f"{race['Circuit']['circuitName']}")
            for_da_result['Race Url'].append(f"{race['url']}")
            for_da_result['Lat'].append(f"{race['Circuit']['Location']['lat']}")
            for_da_result['Long'].append(f"{race['Circuit']['Location']['long']}")
            for_da_result['Locality'].append(f"{race['Circuit']['Location']['locality']}")
            for_da_result['Country'].append(f"{race['Circuit']['Location']['country']}")
            for_da_result['Weather'].append(f"{get_categorical_weather(race['weather'])}")
                
    return pd.DataFrame(for_da_result)

inter_df = create_results_dataframe_from_db_collection()
inter_df.to_csv('results_from_mongo.csv')