In [44]:
import pandas as pd
from lxml import html
import sqlite3
from bs4 import BeautifulSoup
import requests
import codecs
import os
from itertools import product


In [48]:

def table_to_2d(table_tag):
    rowspans = []  # track pending rowspans
    rows = table_tag.find_all('tr')

    # first scan, see how many columns we need
    colcount = 0
    for r, row in enumerate(rows):
        cells = row.find_all(['td', 'th'], recursive=False)
        # count columns (including spanned).
        # add active rowspans from preceding rows
        # we *ignore* the colspan value on the last cell, to prevent
        # creating 'phantom' columns with no actual cells, only extended
        # colspans. This is achieved by hardcoding the last cell width as 1.
        # a colspan of 0 means “fill until the end” but can really only apply
        # to the last cell; ignore it elsewhere.
        colcount = max(
            colcount,
            sum(int(c.get('colspan', 1)) or 1 for c in cells[:-1]) + len(cells[-1:]) + len(rowspans))
        # update rowspan bookkeeping; 0 is a span to the bottom.
        try:
            rowspans += [int(c.get('rowspan', 1)) or len(rows) - r for c in cells]
        except:
            rowspans += [1 or len(rows) - r for c in cells]
        rowspans = [s - 1 for s in rowspans if s > 1]

    # it doesn't matter if there are still rowspan numbers 'active'; no extra
    # rows to show in the table means the larger than 1 rowspan numbers in the
    # last table row are ignored.

    # build an empty matrix for all possible cells
    table = [[None] * colcount for row in rows]

    # fill matrix from row data
    rowspans = {}  # track pending rowspans, column number mapping to count
    for row, row_elem in enumerate(rows):
        span_offset = 0  # how many columns are skipped due to row and colspans
        for col, cell in enumerate(row_elem.find_all(['td', 'th'], recursive=False)):
            # adjust for preceding row and colspans
            col += span_offset
            while rowspans.get(col, 0):
                span_offset += 1
                col += 1

            # fill table data
            try:
                rowspan = rowspans[col] = int(cell.get('rowspan', 1)) or len(rows) - row
            except:
                rowspan = rowspans[col] = 1 or len(rows) - row
            colspan = int(cell.get('colspan', 1)) or colcount - col
            # next column is offset by the colspan
            span_offset += colspan - 1
            value = cell.get_text()
            for drow, dcol in product(range(rowspan), range(colspan)):
                try:
                    table[row + drow][col + dcol] = value.replace('\n','')
                except IndexError:
                    # rowspan or colspan outside the confines of the table
                    pass

        # update rowspan bookkeeping
        rowspans = {c: s - 1 for c, s in rowspans.items() if s > 1}

    return table

In [49]:
def parse_h100top10(url):
    # this code written in beautifulsoup python3.5
    # fetch one wikitable in html format with links from wikipedia

    fullTable = '<table class="wikitable">'

    rPage = requests.get(url)
    soup = BeautifulSoup(rPage.content, "lxml")

    table = soup.find("table", {"class": "wikitable"})
    aa=table_to_2d(table)
    df = pd.DataFrame(aa[1:], columns=aa[0])
    return df

In [50]:
df=parse_h100top10('https://en.wikipedia.org/wiki/List_of_Billboard_Hot_100_top-ten_singles_in_2013')

In [None]:
for ix,row in df.iterrows():
    if 'Singles from ' in row['Top tenentry date']:
        year=row['Top tenentry date'][-4:]
        continue
    title=row['Single'].replace('"','')
    artist=row['Artist(s)']
    date=row['Top tenentry date']+' '+year

In [43]:
df

Unnamed: 0,Top tenentry date,Single,Artist(s),Peak,Peak date,Weeks intop ten,References,None,None.1,None.2,...,None.3,None.4,None.5,None.6,None.7,None.8,None.9,None.10,None.11,None.12
0,Singles from 2012,Singles from 2012,Singles from 2012,Singles from 2012,Singles from 2012,Singles from 2012,Singles from 2012,,,,...,,,,,,,,,,
1,June 9,"""Home"" ◁",Phillip Phillips,6,January 19,13,[1][2][3],,,,...,,,,,,,,,,
2,October 27,"""I Knew You Were Trouble"" ◁",Taylor Swift,2,January 12,16,[4][5][6],,,,...,,,,,,,,,,
3,December 22,"""Beauty and a Beat""",Justin Bieber featuring Nicki Minaj,5,January 5,10,[7][8][9],,,,...,,,,,,,,,,
4,Singles from 2013,Singles from 2013,Singles from 2013,Singles from 2013,Singles from 2013,Singles from 2013,Singles from 2013,,,,...,,,,,,,,,,
5,January 5,"""Thrift Shop"" (#1)",Macklemore & Ryan Lewis featuring Wanz,1,February 2,21,[9][10][11],,,,...,,,,,,,,,,
6,January 19,"""Scream & Shout""",will.i.am and Britney Spears,3,February 16,11,[9][12],,,,...,,,,,,,,,,
7,January 26,"""Don't You Worry Child""[D]",Swedish House Mafia featuring John Martin,6,February 9,6,[13],,,,...,,,,,,,,,,
8,February 2,"""Suit & Tie""[C]",Justin Timberlake featuring Jay-Z,3,April 6,13,[10],,,,...,,,,,,,,,,
9,February 9,"""Fuckin' Problems""","ASAP Rocky featuring Drake, 2 Chainz and Kendr...",8,February 16,2,[12][13],,,,...,,,,,,,,,,
