In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup

In [None]:
# setup driver
driver = webdriver.Firefox()  # or firefox

In [None]:
driver.get("http://www.google.com")

In [None]:
# go to page
driver.get("http://wbsec.gov.in/(S(eoxjutirydhdvx550untivvu))/DetailedResult/Detailed_gp_2013.aspx")

### Zilla Parishad Name

We can use the method `find_element_by_name` to find an element on the page by its name. An easy way to do this is to inspect the element.

In [None]:
# find "district" drop down
district = driver.find_element_by_name("ddldistrict")

Now if we want to get the different options in this drop down, we can do the same. You'll notice that each name is associated with a unique value. Here since we're getting multiple elements, we'll use `find_elements_by_tag_name`

In [None]:
# find options in that drop down
district_options = district.find_elements_by_tag_name("option")

print(district_options[1].get_attribute("value"))
print(district_options[1].text)

Now we'll make a dictionary associating each name with its value.

In [None]:
d_options = {option.text.strip(): option.get_attribute("value") for option in district_options if option.get_attribute("value").isdigit()}
print(d_options)

Now we can select a district by using its name and our dictionary. First we'll make our own function using Selenium's `Select`, and then we'll call it on "Bankura".

In [None]:
district_select = Select(district)
district_select.select_by_value(d_options["Bankura"])

### Panchayat Samity Name

We can do the same as we did above to find the different blocks.

In [None]:
# find the "block" drop down
block = driver.find_element_by_name("ddlblock")

In [None]:
# get options
block_options = block.find_elements_by_tag_name("option")

print(block_options[1].get_attribute("value"))
print(block_options[1].text)

In [None]:
b_options = {option.text.strip(): option.get_attribute("value") for option in block_options if option.get_attribute("value").isdigit()}
print(b_options)

In [None]:
block_select = Select(block)
block_select.select_by_value(b_options["BANKURA-I"])

### Gram Panchayat Name

Let's do it again for the third drop down menu.

In [None]:
# get options
gp = driver.find_element_by_name("ddlgp")
gp_options = gp.find_elements_by_tag_name("option")

print(gp_options[1].get_attribute("value"))
print(gp_options[1].text)

In [None]:
gp_options = {option.text.strip(): option.get_attribute("value") for option in gp_options if option.get_attribute("value").isdigit()}
print(gp_options)

In [None]:
gp_select = Select(gp)
gp_select.select_by_value(gp_options["ANCHURI"])

### Save data from the generated table

Our selections brought us to a table. Now let's get the underlying html. First we'll identify it by its CSS selector, and then use the `get_attribute` method.

In [None]:
# get the html for the table
table = driver.find_element_by_css_selector("#DataGrid1").get_attribute('innerHTML')

To parse the html, we'll use BeautifulSoup.

In [None]:
# soup-ify
table = BeautifulSoup(table, 'lxml')

In [None]:
table

First we'll get all the rows of the table using the `tr` selector.

In [None]:
# get list of rows
rows = [row for row in table.select("tr")]

But the first row is the header so we don't want that.

In [None]:
print(rows[0])
print()
print(rows[1])

rows = rows[1:]

Each cell in the row corresponds to the data we want.

In [None]:
rows[0].select('td')

Now it's just a matter of looping through the rows and getting the information we want from each one.

In [None]:
#for row in rows:
data = []
for row in rows:
    dic = {}
    dic['seat'] = row.select('td')[0].text
    dic['electors'] = row.select('td')[1].text
    dic['polled'] = row.select('td')[2].text
    dic['rejected'] = row.select('td')[3].text
    dic['osn'] = row.select('td')[4].text
    dic['candidate'] = row.select('td')[5].text
    dic['party'] = row.select('td')[6].text
    dic['secured'] = row.select('td')[7].text
    data.append(dic)

Let's clean up the text a little bit.

In [None]:
# strip whitespace
for dic in data:
    for key in dic:
        dic[key] = dic[key].strip()

In [None]:
not data[0]['seat']

You'll notice that some of the information, such as total electors, is not supplied for each canddiate. This code will add that information for the candidates who don't have it.

In [None]:
#fill out info

i = 0
while i < len(data):
    if data[i]['seat']:
        seat = data[i]['seat']
        electors = data[i]['electors']
        polled = data[i]['polled']
        rejected = data[i]['rejected']
        i = i+1
    else:
        data[i]['seat'] = seat
        data[i]['electors'] = electors
        data[i]['polled'] = polled
        data[i]['rejected'] = rejected
        i = i+1

In [None]:
data