In [1]:
import requests
from bs4 import BeautifulSoup
import re
import time

In [3]:
all_episodes = []
alphabet = "abcdefghijklmnopqrstuvwxyz"
base_url = "https://www.comedy.co.uk"
series_base_url = "https://www.comedy.co.uk/tv/qi/episodes"

# for speed
score_regex = re.compile(r"(\-?[0-9]+)\ points?")

# try the entire alphabet
for i in range(1, 27):
    letter = alphabet[i-1]
    print("Processing series {}...".format(letter))
    series_url = "{}/{}".format(series_base_url, i)
    
    # extract the html from the series page and loop through episodes
    req = requests.get(series_url)
    
    if req.status_code != 200:
        print("\tError {}, skipping...".format(req.status_code))
        break
    
    series_soup = BeautifulSoup(req.text, "html.parser")
    episodes = series_soup.select("h3.media-heading")
    print("\t{} episodes found".format(len(episodes)))
    
    for e_idx, e in enumerate(episodes):
        print("\tEpisode {}...".format(e_idx))
        # extract the href and the episode title
        a = e.find("a")
        href = a.attrs["href"]
        title = a.text
        link = "{}{}".format(base_url, href)
        
        # go to the episode page and extract the contestants
        episode_req = requests.get(link)
        
        if episode_req.status_code != 200:
            print("Error {}".format(episode_req.status_code))
            break
        
        # get the episode HTML
        episode_soup = BeautifulSoup(episode_req.text, "html.parser")

        # episode date
        broadcast_date = episode_soup.select("dd")[0].text
        
        # create a dict object of the episode
        episode_obj = {
            "title": title,
            "series": letter,
            "date": broadcast_date
        }
        
        # extract all <h4> elements
        headers = episode_soup.select("h4")
        
        # find the one corresponding to the Scores, and take the <p> below
        idx = -1
        for i, h in enumerate(headers):
            if h.text == "Scores":
                idx = i

        # if we didn't find a Scores element, store a placeholder
        if idx == -1:
            episode_obj["raw_scores"] = "No scores found"
        else:        
            scores = episode_soup.select("h4:nth-of-type({}) + p".format(idx + 1))
            # now use regex to extract each contestant and the number of points they got
            contestant_text = scores[0].text.split("- ")
            contestants = []

            for c in contestant_text[1:]:
                r_match = score_regex.search(c)
                if r_match:
                    score = int(r_match.groups()[0])
                    contestant = c.replace("{} points".format(score), "")\
                                  .replace("{} point".format(score), "")\
                                  .replace(":", "").strip()
                    contestants.append((contestant, score))
                else:
                    print("\t\tNo contestant found!")
                    episode_obj["raw_scores"] = scores[0].text

            for c_idx, c in enumerate(contestants):
                episode_obj["contestant_{}".format(c_idx + 1)] = c[0]
                episode_obj["contestant_{}_score".format(c_idx + 1)] = c[1]
        
        # store episode in the master list
        all_episodes.append(episode_obj)
        
        # wait a little bit
        time.sleep(0.5)
    
    # wait a bit...
    time.sleep(2)

print("Done! Processed {} episodes".format(len(all_episodes)))

Processing series a...
	12 episodes found
	Episode 0...
	Episode 1...
	Episode 2...
	Episode 3...
	Episode 4...
	Episode 5...
	Episode 6...
	Episode 7...
	Episode 8...
	Episode 9...
	Episode 10...
	Episode 11...
Processing series b...
	12 episodes found
	Episode 0...
	Episode 1...
	Episode 2...
	Episode 3...
	Episode 4...
	Episode 5...
	Episode 6...
	Episode 7...
	Episode 8...
	Episode 9...
	Episode 10...
	Episode 11...
Processing series c...
	12 episodes found
	Episode 0...
	Episode 1...
	Episode 2...
	Episode 3...
	Episode 4...
	Episode 5...
	Episode 6...
	Episode 7...
	Episode 8...
	Episode 9...
	Episode 10...
	Episode 11...
Processing series d...
	13 episodes found
	Episode 0...
	Episode 1...
	Episode 2...
	Episode 3...
	Episode 4...
	Episode 5...
	Episode 6...
	Episode 7...
	Episode 8...
	Episode 9...
	Episode 10...
	Episode 11...
	Episode 12...
Processing series e...
	13 episodes found
	Episode 0...
	Episode 1...
	Episode 2...
	Episode 3...
	Episode 4...
	Episode 5...
	Episode 6.

In [4]:
import pandas as pd

df = pd.DataFrame(all_episodes)

df.to_csv("qi_episodes_raw.csv", index=False)

## Fix data issues

In [58]:
import pandas as pd

df = pd.read_csv("qi_episodes_raw.csv")
    
print(len(df))
df.head()

218


Unnamed: 0,contestant_1,contestant_1_score,contestant_2,contestant_2_score,contestant_3,contestant_3_score,contestant_4,contestant_4_score,contestant_5,contestant_5_score,date,raw_scores,series,title
0,,,,,,,,,,,Thursday 11th September 2003,,a,1. Adam
1,Rich Hall and Jeremy Hardy,20.0,Bill Bailey,5.0,Alan Davies,-30.0,,,,,Thursday 18th September 2003,,a,2. Astronomy
2,Clive Anderson,26.0,Meera Syal,19.0,Bill Bailey,10.0,Alan Davies,-20.0,,,Thursday 25th September 2003,,a,3. Aquatic Animals
3,,,,,,,,,,,Thursday 2nd October 2003,,a,4. Atoms
4,Gyles Brandreth,54.0,Rich Hall,35.0,Rob Brydon,17.0,Alan Davies,15.0,,,Thursday 9th October 2003,,a,5. Advertising


Drop "highlights" and "outtakes" episodes

In [59]:
df.drop(df[df.title.str.contains("Highlights")].index, axis=0, inplace=True)
df.drop(61, axis=0, inplace=True) # outtakes
df.drop(126, axis=0, inplace=True) # sports relief special
df.drop(215, axis=0, inplace=True) # not broadcast yet, no winner
print(len(df))

199


Fill in missing contestants to avoid NaNs

In [60]:
for i in range(1,6):
    # fill missing values
    df["contestant_{}".format(i)].fillna("N/A", inplace=True)

Sometimes when 2 people have the same score we have "X and Y" as a single contestant

Plus fix some other data bugs

In [61]:
df.loc[df.index == 1, "contestant_1"] = "Rich Hall"
df.loc[df.index == 1, "contestant_4"] = "Jeremy Hardy"
df.loc[df.index == 1, "contestant_4_score"] = 20

df.loc[df.index == 8, "contestant_2"] = "Jo Brand"
df.loc[df.index == 8, "contestant_4"] = "Jeremy Hardy"
df.loc[df.index == 8, "contestant_4_score"] = 15

df.loc[df.index == 17, "contestant_3"] = "Alan Davies"
df.loc[df.index == 17, "contestant_4"] = "Sean Lock"
df.loc[df.index == 17, "contestant_4_score"] = -18

df.loc[df.index == 20, "contestant_1"] = "Rich Hall"
df.loc[df.index == 20, "contestant_4"] = "Josie Lawrence"
df.loc[df.index == 20, "contestant_4_score"] = 4

df.loc[df.index == 34, "contestant_2"] = "Jo Brand"
df.loc[df.index == 34, "contestant_4"] = "Phill Jupitus"
df.loc[df.index == 34, "contestant_4_score"] = -23

df.loc[df.index == 39, "contestant_1"] = "Ronni Ancona"
df.loc[df.index == 39, "contestant_2"] = "Rory Bremner"
df.loc[df.index == 39, "contestant_3"] = "Alan Davies"
df.loc[df.index == 39, "contestant_3_score"] = -7
df.loc[df.index == 39, "contestant_4"] = "Phill Jupitus"
df.loc[df.index == 39, "contestant_4_score"] = -7

df.loc[df.index == 46, "contestant_1"] = "Alan Davies"
df.loc[df.index == 46, "contestant_1_score"] = -39
df.loc[df.index == 46, "contestant_3"] = "Vic Reeves"
df.loc[df.index == 46, "contestant_3_score"] = 1
df.loc[df.index == 46, "contestant_4"] = "Roger McGough"
df.loc[df.index == 46, "contestant_4_score"] = 1

df.loc[df.index == 55, "contestant_1"] = "Clive Anderson"
df.loc[df.index == 55, "contestant_4"] = "Vic Reeves"
df.loc[df.index == 55, "contestant_4_score"] = 5

df.loc[df.index == 57, "contestant_3"] = "Bill Bailey"
df.loc[df.index == 57, "contestant_4"] = "Alan Davies"
df.loc[df.index == 57, "contestant_4_score"] = -6

df.loc[df.index == 59, "contestant_1"] = "Jimmy Carr"
df.loc[df.index == 59, "contestant_4"] = "Dara O'Briain"
df.loc[df.index == 59, "contestant_4_score"] = 5
df.loc[df.index == 59, "contestant_2"] = "Alan Davies"
df.loc[df.index == 59, "contestant_3"] = "Doon Mackichan"
df.loc[df.index == 59, "contestant_3_score"] = -17

df.loc[df.index == 65, "contestant_1"] = "Pam Ayres"
df.loc[df.index == 65, "contestant_4"] = "Johnny Vegas"
df.loc[df.index == 65, "contestant_4_score"] = 1

df.loc[df.index == 74, "contestant_1"] = "David Mitchell"
df.loc[df.index == 74, "contestant_4"] = "Dara O'Briain"
df.loc[df.index == 74, "contestant_4_score"] = 3

df.loc[df.index == 75, "contestant_3"] = "Bill Bailey"
df.loc[df.index == 75, "contestant_4"] = "Sean Lock"
df.loc[df.index == 75, "contestant_4_score"] = -7

df.loc[df.index == 74, "contestant_1"] = "David Mitchell"
df.loc[df.index == 74, "contestant_4"] = "Dara O'Briain"
df.loc[df.index == 74, "contestant_4_score"] = 3

df.loc[df.index == 80, "contestant_1"] = "Sandi Toksvig"
df.loc[df.index == 80, "contestant_1_score"] = 5
df.loc[df.index == 80, "contestant_2"] = "Ronni Ancona"
df.loc[df.index == 80, "contestant_2_score"] = 3
df.loc[df.index == 80, "contestant_3"] = "The Boys"
df.loc[df.index == 80, "contestant_3_score"] = -5

df.loc[df.index == 85, "contestant_1"] = "Rich Hall"
df.loc[df.index == 85, "contestant_4"] = "Alan Davies"
df.loc[df.index == 85, "contestant_4_score"] = 3

df.loc[df.index == 86, "contestant_1"] = "Alan Davies"
df.loc[df.index == 86, "contestant_4"] = "Jack Dee"
df.loc[df.index == 86, "contestant_4_score"] = -17

df.loc[df.index == 89, "contestant_1"] = "Alan Davies"
df.loc[df.index == 89, "contestant_3"] = "Rob Brydon"
df.loc[df.index == 89, "contestant_4"] = "Johnny Vegas"
df.loc[df.index == 89, "contestant_4_score"] = 2

df.loc[df.index == 93, "contestant_1"] = "Gyles Brandreth"
df.loc[df.index == 93, "contestant_4"] = "Sue Perkins"
df.loc[df.index == 93, "contestant_4_score"] = -8

df.loc[df.index == 98, "contestant_2"] = "Chris Addison"
df.loc[df.index == 98, "contestant_2_score"] = -13.8

df.loc[df.index == 112, "contestant_1"] = "John Bishop"
df.loc[df.index == 112, "contestant_4"] = "Frank Skinner"
df.loc[df.index == 112, "contestant_4_score"] = 4

df.loc[df.index == 117, "contestant_4"] = "Alan Davies"
df.loc[df.index == 117, "contestant_4_score"] = -1000000

df.loc[df.index == 128, "contestant_1"] = "Victoria Coren Mitchell"

df.loc[df.index == 134, "contestant_2"] = "Alan Davies"
df.loc[df.index == 134, "contestant_4"] = "Johnny Vegas"
df.loc[df.index == 134, "contestant_4_score"] = -6

df.loc[df.index == 135, "contestant_1"] = "John Sessions"
df.loc[df.index == 135, "contestant_1_score"] = 16
df.loc[df.index == 135, "contestant_2"] = "Alan Davies"
df.loc[df.index == 135, "contestant_2_score"] = 1
df.loc[df.index == 135, "contestant_3"] = "Jo Brand"
df.loc[df.index == 135, "contestant_3_score"] = 1
df.loc[df.index == 135, "contestant_4"] = "Dara O'Briain"
df.loc[df.index == 135, "contestant_4_score"] = 1

df.loc[df.index == 150, "contestant_3"] = "Bill Bailey"
df.loc[df.index == 150, "contestant_5"] = "Jimmy Carr"
df.loc[df.index == 150, "contestant_5_score"] = -19

df.loc[df.index == 152, "contestant_1"] = "Alan Davies"
df.loc[df.index == 152, "contestant_1_score"] = 689.66
df.loc[df.index == 152, "contestant_2"] = "Jo Brand"
df.loc[df.index == 152, "contestant_2_score"] = 85.73
df.loc[df.index == 152, "contestant_3"] = "Jimmy Carr"
df.loc[df.index == 152, "contestant_3_score"] = 33.58
df.loc[df.index == 152, "contestant_4"] = "The Audience"
df.loc[df.index == 152, "contestant_4_score"] = 23.24
df.loc[df.index == 152, "contestant_5"] = "Graham Linehan"
df.loc[df.index == 152, "contestant_5_score"] = -19

df.loc[df.index == 153, "contestant_3"] = "Tim Minchin"
df.loc[df.index == 153, "contestant_3_score"] = 0
df.loc[df.index == 153, "contestant_4"] = "Alan Davies"
df.loc[df.index == 153, "contestant_4_score"] = 0

df.loc[df.index == 155, "contestant_2"] = "Sarah Millican"
df.loc[df.index == 155, "contestant_5"] = "Jason Manford"
df.loc[df.index == 155, "contestant_5_score"] = -9

df.loc[df.index == 157, "contestant_1"] = "The Rev. Richard Coles"
df.loc[df.index == 157, "contestant_4"] = "Victoria Coren Mitchell"
df.loc[df.index == 157, "contestant_4_score"] = 3

df.loc[df.index == 158, "contestant_4"] = "Alan Davies"
df.loc[df.index == 158, "contestant_4_score"] = -6

df.loc[df.index == 161, "contestant_3"] = "Liza Tarbuck"
df.loc[df.index == 161, "contestant_4"] = "Susan Calman"
df.loc[df.index == 161, "contestant_4_score"] = -9

df.loc[df.index == 166, "contestant_2"] = "The Audience"
df.loc[df.index == 166, "contestant_2_score"] = -2.5

df.loc[df.index == 170, "contestant_4"] = "Alan Davies"
df.loc[df.index == 170, "contestant_4_score"] = 0

df.loc[df.index == 184, "contestant_1"] = "David Mitchell"
df.loc[df.index == 184, "contestant_1_score"] = -4
df.loc[df.index == 184, "contestant_2"] = "Sue Perkins"
df.loc[df.index == 184, "contestant_2_score"] = 0
df.loc[df.index == 184, "contestant_3"] = "Alan Davies"
df.loc[df.index == 184, "contestant_3_score"] = 0
df.loc[df.index == 184, "contestant_4"] = "Sami Saha"
df.loc[df.index == 184, "contestant_4_score"] = 0

df.loc[df.index == 193, "contestant_1"] = "Matt Lucas"
df.loc[df.index == 193, "contestant_4"] = "Julia Zemiro"
df.loc[df.index == 193, "contestant_4_score"] = -6

df.loc[df.index == 210, "contestant_1"] = "Nish Kumar"
df.loc[df.index == 210, "contestant_4"] = "Phill Jupitus"
df.loc[df.index == 210, "contestant_4_score"] = 3

Use regex to remove brackets from contestant names

In [62]:
import re

bracket_regex = re.compile(r"\([a-zA-Z 0-9',]+\)")
for i in range(1,6):
    df["contestant_{}".format(i)] = \
        df["contestant_{}".format(i)].apply(lambda x: bracket_regex.sub("", x).strip())

Fill in some weirdly missing contestant information

In [63]:
df.loc[df.index == 0, "contestant_1"] = "Danny Baker"
df.loc[df.index == 0, "contestant_1_score"] = 18
df.loc[df.index == 0, "contestant_2"] = "Hugh Laurie"
df.loc[df.index == 0, "contestant_2_score"] = 11
df.loc[df.index == 0, "contestant_3"] = "John Sessions"
df.loc[df.index == 0, "contestant_3_score"] = 10
df.loc[df.index == 0, "contestant_4"] = "Alan Davies"
df.loc[df.index == 0, "contestant_4_score"] = -5

df.loc[df.index == 3, "contestant_1"] = "Jo Brand"
df.loc[df.index == 3, "contestant_1_score"] = 36
df.loc[df.index == 3, "contestant_2"] = "Howard Goodall"
df.loc[df.index == 3, "contestant_2_score"] = 13
df.loc[df.index == 3, "contestant_3"] = "Jeremy Hardy"
df.loc[df.index == 3, "contestant_3_score"] = 7
df.loc[df.index == 3, "contestant_4"] = "Alan Davies"
df.loc[df.index == 3, "contestant_4_score"] = -24

df.loc[df.index == 15, "contestant_1"] = "Jeremy Clarkson"
df.loc[df.index == 15, "contestant_1_score"] = 5
df.loc[df.index == 15, "contestant_2"] = "Jeremy Hardy"
df.loc[df.index == 15, "contestant_2_score"] = 3
df.loc[df.index == 15, "contestant_3"] = "Alan Davies"
df.loc[df.index == 15, "contestant_3_score"] = -6
df.loc[df.index == 15, "contestant_4"] = "Barry Cryer"
df.loc[df.index == 15, "contestant_4_score"] = -6

df.loc[df.index == 23, "contestant_1"] = "Rich Hall"
df.loc[df.index == 23, "contestant_1_score"] = 7
df.loc[df.index == 23, "contestant_2"] = "Mark Steel"
df.loc[df.index == 23, "contestant_2_score"] = 2
df.loc[df.index == 23, "contestant_3"] = "Phill Jupitus"
df.loc[df.index == 23, "contestant_3_score"] = -3
df.loc[df.index == 23, "contestant_4"] = "Alan Davies"
df.loc[df.index == 23, "contestant_4_score"] = -52
df.loc[df.index == 23, "contestant_5"] = "Stephen Fry"
df.loc[df.index == 23, "contestant_5_score"] = -56

df.loc[df.index == 43, "contestant_1"] = "Jonathan Ross"
df.loc[df.index == 43, "contestant_1_score"] = 3
df.loc[df.index == 43, "contestant_2"] = "Rich Hall"
df.loc[df.index == 43, "contestant_2_score"] = 2
df.loc[df.index == 43, "contestant_3"] = "Phill Jupitus"
df.loc[df.index == 43, "contestant_3_score"] = 1
df.loc[df.index == 43, "contestant_4"] = "Alan Davies"
df.loc[df.index == 43, "contestant_4_score"] = -29

df.loc[df.index == 62, "contestant_1"] = "Ronni Ancona"
df.loc[df.index == 62, "contestant_1_score"] = 5
df.loc[df.index == 62, "contestant_2"] = "David Mitchell"
df.loc[df.index == 62, "contestant_2_score"] = 3
df.loc[df.index == 62, "contestant_3"] = "Alan Davies"
df.loc[df.index == 62, "contestant_3_score"] = -6
df.loc[df.index == 62, "contestant_4"] = "Sir Terry Wogan"
df.loc[df.index == 62, "contestant_4_score"] = -9

df.loc[df.index == 138, "contestant_1"] = "Bill Bailey"
df.loc[df.index == 138, "contestant_1_score"] = 6
df.loc[df.index == 138, "contestant_2"] = "Ross Noble"
df.loc[df.index == 138, "contestant_2_score"] = 5
df.loc[df.index == 138, "contestant_3"] = "Alan Davies"
df.loc[df.index == 138, "contestant_3_score"] = -4
df.loc[df.index == 138, "contestant_4"] = "Julian Clary"
df.loc[df.index == 138, "contestant_4_score"] = -7
                         
df.loc[df.index == 139, "contestant_1"] = "Alan Davies"
df.loc[df.index == 139, "contestant_1_score"] = 5
df.loc[df.index == 139, "contestant_2"] = "Rhys Darby"
df.loc[df.index == 139, "contestant_2_score"] = 3
df.loc[df.index == 139, "contestant_3"] = "Jason Manford"
df.loc[df.index == 139, "contestant_3_score"] = -2
df.loc[df.index == 139, "contestant_4"] = "Prof. Brian Cox"
df.loc[df.index == 139, "contestant_4_score"] = -7

df.loc[df.index == 149, "contestant_1"] = "David Mitchell"
df.loc[df.index == 149, "contestant_1_score"] = -6
df.loc[df.index == 149, "contestant_2"] = "Ross Noble"
df.loc[df.index == 149, "contestant_2_score"] = -9
df.loc[df.index == 149, "contestant_3"] = "Alan Davies"
df.loc[df.index == 149, "contestant_3_score"] = -17
df.loc[df.index == 149, "contestant_4"] = "Sue Perkins"
df.loc[df.index == 149, "contestant_4_score"] = -22

df.loc[df.index == 165, "contestant_1"] = "Jason Manford"
df.loc[df.index == 165, "contestant_1_score"] = 3
df.loc[df.index == 165, "contestant_2"] = "Johnny Vegas"
df.loc[df.index == 165, "contestant_2_score"] = 2
df.loc[df.index == 165, "contestant_3"] = "Aisling Bea"
df.loc[df.index == 165, "contestant_3_score"] = -7
df.loc[df.index == 165, "contestant_4"] = "The Audience"
df.loc[df.index == 165, "contestant_4_score"] = -10
df.loc[df.index == 165, "contestant_5"] = "Alan Davies"
df.loc[df.index == 165, "contestant_5_score"] = -14

df.loc[df.index == 176, "contestant_1"] = "Danny Baker"
df.loc[df.index == 176, "contestant_1_score"] = -23
df.loc[df.index == 176, "contestant_2"] = "Jeremy Clarkson"
df.loc[df.index == 176, "contestant_2_score"] = -5
df.loc[df.index == 176, "contestant_3"] = "Sandi Toksvig"
df.loc[df.index == 176, "contestant_3_score"] = 3
df.loc[df.index == 176, "contestant_4"] = "Alan Davies"
df.loc[df.index == 176, "contestant_4_score"] = 52

df.loc[df.index == 179, "contestant_1"] = "Colin Lane"
df.loc[df.index == 179, "contestant_1_score"] = 2
df.loc[df.index == 179, "contestant_2"] = "Alan Davies"
df.loc[df.index == 179, "contestant_2_score"] = 0
df.loc[df.index == 179, "contestant_3"] = "Jo Brand"
df.loc[df.index == 179, "contestant_3_score"] = -18
df.loc[df.index == 179, "contestant_4"] = "David Mitchell"
df.loc[df.index == 179, "contestant_4_score"] = -48

df.loc[df.index == 208, "contestant_1"] = "Susan Calman"
df.loc[df.index == 208, "contestant_1_score"] = 0
df.loc[df.index == 208, "contestant_2"] = "Alan Davies"
df.loc[df.index == 208, "contestant_2_score"] = 0
df.loc[df.index == 208, "contestant_3"] = "Matt Lucas"
df.loc[df.index == 208, "contestant_3_score"] = 0
df.loc[df.index == 208, "contestant_4"] = "Josh Widdicombe"
df.loc[df.index == 208, "contestant_4_score"] = 0

Calculate Alan's score and whether he won

In [64]:
def alan_score(row):
    for i in range(1, 6):
        if row["contestant_{}".format(i)] == "Alan Davies":
            return row["contestant_{}_score".format(i)]

def did_alan_win(row):
    max_score = row[["contestant_{}_score".format(i) for i in range(1, 6)]].max()
    for i in range(1, 6):
        if row["contestant_{}".format(i)] == "Alan Davies" and row["contestant_{}_score".format(i)] == max_score:
            return 1
    return 0    
        
df["alan_score"] = df.apply(alan_score, axis=1)
df["did_alan_win"] = df.apply(did_alan_win, axis=1)

# special case, episode 176 where lowest score won
df.loc[df.index == 176, "did_alan_win"] = 0

# special case, episode 208, no points but Alan declared winner
df.loc[df.index == 208, "did_alan_win"] = 1

# check percentages
df.did_alan_win.value_counts() * 100 / len(df)

0    83.417085
1    16.582915
Name: did_alan_win, dtype: float64

Extract dates

In [65]:
df["date"] = pd.to_datetime(df["date"])

Look at some rows

In [66]:
df.sample(10, random_state=42)

Unnamed: 0,contestant_1,contestant_1_score,contestant_2,contestant_2_score,contestant_3,contestant_3_score,contestant_4,contestant_4_score,contestant_5,contestant_5_score,date,raw_scores,series,title,alan_score,did_alan_win
83,David Mitchell,4.0,Alan Davies,2.0,Sean Lock,-6.0,Barack Obama,-10.0,Jo Brand,-46.0,2010-01-29,,g,10. Greats,2.0,0
15,Jeremy Clarkson,5.0,Jeremy Hardy,3.0,Alan Davies,-6.0,Barry Cryer,-6.0,,,2004-10-22,,b,4. Bible,-6.0,0
116,Prof. Brian Cox,5.0,Alan Davies,2.0,Ross Noble,-6.0,Sue Perkins,-17.0,,,2011-10-21,,i,7. Incomprehensible,2.0,0
191,Phill Jupitus,4.0,Cariad Lloyd,0.0,Dermot O'Leary,-10.0,Alan Davies,-16.0,,,2016-01-08,,m,10. Making A Meal Of It,-16.0,0
77,Alan Davies,-7.0,Jo Brand,-10.0,Jimmy Carr,-21.0,Rob Brydon,-28.0,,,2009-12-17,,g,4. Geography,-7.0,1
175,Jimmy Carr,5.0,Carrie Fisher,3.0,Bill Bailey,-6.0,Alan Davies,-46.0,,,2014-12-25,,l,Christmas Special: No-L,-46.0,0
69,Rich Hall,4.0,Clive Anderson,-5.0,Reginald D. Hunter,-6.0,Alan Davies,-35.0,,,2009-02-13,,f,8. Fashion,-35.0,0
68,Phill Jupitus,-24.0,Jo Brand,-26.0,Dara O'Briain,-28.0,Alan Davies,-42.0,,,2009-02-06,,f,7. Fingers And Fumbs,-42.0,0
128,Victoria Coren Mitchell,13.0,Bill Bailey,10.0,Alan Davies,-6.0,Jimmy Carr,-22.0,,,2012-09-14,,j,1. Jargon,-6.0,0
187,Jo Brand,7.0,Greg Davies,-4.0,Bill Bailey,-13.0,Alan Davies,-32.0,,,2015-11-27,,m,6. Marriage And Matings,-32.0,0


In [67]:
df.to_csv("qi_episodes.csv", index=False)