# Importing necessary libraries

In [56]:
import pandas as pd
import requests
import bs4
import re
import datetime

# Extracting the html code

In [57]:
url = "https://en.wikipedia.org/wiki/Dragons%27_Den_(British_TV_programme)#Statistics"

In [58]:
resp = requests.get(url).text

In [59]:
soup = bs4.BeautifulSoup(resp, "html.parser")

# Extracting the data

In [60]:
tables_list = soup.find_all("table",{"class":"wikitable sortable"})


In [61]:
header_list = [th.text.rstrip() for th in tables_list[1].find_all("th")]
header_list.append("Season")

In [62]:

c1 = []
c2 = []
c3 = []
c4 = []
c5 = []
c6 = []  
c7 = []
c8 = []
c9 = []
c10 = []
lists = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]



for i in range(13):# Because we have 13 tables we would like to extract and it starts with the seconde table in our list
    table = tables_list[i+1].find_all("tr")
    for row in table:
        cells = row.find_all("td")
        if len(cells) == 9:
            c1.append(cells[0].find(text=True).replace(r'Episode',''))
            c2.append(cells[1].find(text=True))
            c3.append(cells[2].find(text=True))
            c4.append(cells[3].find(text=True))
            c5.append(cells[4].find(text=True))
            c6.append(cells[5].find(text=True))
            c7.append(cells[6].find(text=True))
            c8.append(cells[7].find(text=True))
            c9.append(cells[8].text)
            c10.append(i+1)

In [63]:
dictionnary = {i:j for i,j in zip(header_list, lists)}

In [64]:
df_shark_tank_table = pd.DataFrame(dictionnary)

In [65]:
df_shark_tank_table = df_shark_tank_table.replace(r'\n','', regex=True).replace(r"\[\d+]",'', regex=True)

## Getting current GBP to USD exchange rate from api

In [66]:
rate_GBP_USD = requests.get('https://api.exchangeratesapi.io/latest?base=GBP&symbols=USD')

In [67]:
resp_data = rate_GBP_USD.json()
resp_data

{'rates': {'USD': 1.3144163229}, 'base': 'GBP', 'date': '2020-08-26'}

In [68]:
rate = resp_data['rates']['USD']

## Converting the column Money requested in £ to $

In [69]:
df_shark_tank_table["Money requested (£)"] = df_shark_tank_table["Money requested (£)"].replace(r",", "", regex=True).astype(float)

In [70]:
df_shark_tank_table["Money requested (£)"] = round(df_shark_tank_table["Money requested (£)"] * rate).astype(int)

## Changing the name of the columns to fit the naming convention

In [71]:
df_shark_tank_table = df_shark_tank_table.rename(columns={"Money requested (£)": "money_requested_converted_$", "%" : "stake_in_%_sold", "Website and Fate" : "fate"})

In [72]:
df_shark_tank_table.columns = df_shark_tank_table.columns.str.lower().str.replace(r" ", "_")

## Deleting everything in parentheses in the fate column

In [73]:
df_shark_tank_table["fate"] = df_shark_tank_table["fate"].replace(r'\(.+\)','', regex=True)
df_shark_tank_table.loc[df_shark_tank_table['fate'] == "", 'fate'] = 'active'
df_shark_tank_table.head()

Unnamed: 0,episode,first_aired,entrepreneur(s),company_or_product_name,money_requested_converted_$,stake_in_%_sold,description_of_product,investing_dragon(s),fate,season
0,1,4 January 2005,Charles Ejogo,Umbrolly,197162,40,Multimedia vending unit selling umbrellas and ...,Duncan Bannatyne & Peter Jones,dissolved,1
1,2,11 January 2005,Tracey Ann Graily,Grails Ltd,157730,40,Tailor-made suits for businesswomen,Doug Richard & Rachel Elnaugh,dissolved,1
2,3,18 January 2005,Tracie Herrtage,LE BEANOCK.COM,70978,49,A beanbag hammock,Rachel Elnaugh,"no equity, active",1
3,3,18 January 2005,John and Phillip Petty,"IV Cam, Industrial Control Systems",65721,30,A 3D measuring system using camera technology,Peter Jones & Doug Richard,"no equity, active",1
4,4,25 January 2005,Paul Thomas,Mycorrhizal Systems,98581,25,Land for a truffle farm,Simon Woodroffe,"deal failed, active",1


## In stake_in_%_sold deleting % and transfroming column to int

In [74]:
df_shark_tank_table["stake_in_%_sold"] = df_shark_tank_table["stake_in_%_sold"].replace(r'%','', regex=True).str.strip()

In [75]:
df_shark_tank_table["stake_in_%_sold"] = pd.to_numeric(df_shark_tank_table["stake_in_%_sold"], errors='coerce')

In [76]:
df_shark_tank_table["valuation"] = round((df_shark_tank_table['money_requested_converted_$']/df_shark_tank_table["stake_in_%_sold"])*100)

In [77]:
df_shark_tank_table

Unnamed: 0,episode,first_aired,entrepreneur(s),company_or_product_name,money_requested_converted_$,stake_in_%_sold,description_of_product,investing_dragon(s),fate,season,valuation
0,1,4 January 2005,Charles Ejogo,Umbrolly,197162,40.0,Multimedia vending unit selling umbrellas and ...,Duncan Bannatyne & Peter Jones,dissolved,1,492905.0
1,2,11 January 2005,Tracey Ann Graily,Grails Ltd,157730,40.0,Tailor-made suits for businesswomen,Doug Richard & Rachel Elnaugh,dissolved,1,394325.0
2,3,18 January 2005,Tracie Herrtage,LE BEANOCK.COM,70978,49.0,A beanbag hammock,Rachel Elnaugh,"no equity, active",1,144853.0
3,3,18 January 2005,John and Phillip Petty,"IV Cam, Industrial Control Systems",65721,30.0,A 3D measuring system using camera technology,Peter Jones & Doug Richard,"no equity, active",1,219070.0
4,4,25 January 2005,Paul Thomas,Mycorrhizal Systems,98581,25.0,Land for a truffle farm,Simon Woodroffe,"deal failed, active",1,394324.0
...,...,...,...,...,...,...,...,...,...,...,...
134,13,24 January 2016,Ben Mason,Masons Beans,65721,20.0,"Freshly cooked, baked beans",Nick Jenkins,active,13,328605.0
135,14,31 January 2016,David Kendall,Slappie Ltd,65721,45.0,"""Slap-on"" wrist watches",Nick Jenkins,active,13,146047.0
136,14,31 January 2016,Jonathan Harris and Jonathan Schofield,Opus,105153,25.0,Trailer tents,Deborah Meaden,active,13,420612.0
137,14,31 January 2016,Morag Ekanger and Paz Sarmah,Bad Brownie,78865,30.0,Gourmet-flavoured chocolate brownies,Touker Suleyman,active,13,262883.0


# Exporting the DataFrame as a .csv file

In [78]:
df_shark_tank_table.to_csv('Dragons_den_web_scrape_wikipedia.csv', index = False)