Goal: Use wikipedia to obtain the coordinates for the constituency in csv file and plot on the map.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('ls2019_winner_data.csv')

As the wiki page for constituency has some sequence, the same is used to search for the wiki page and extract the coordinates. example: https://en.wikipedia.org/wiki/Srikakulam_Lok_Sabha_constituency

In [3]:
import requests
from bs4 import BeautifulSoup

coord_list = []
# List of place names
places = df['Constituency'].apply(lambda x: x.title())

# Loop through each place
for place in places:
    # Construct the Wikipedia URL for the place
    place = place + "_Lok_Sabha_constituency"
    place = place.replace(" ", "_")
    url = f"https://en.wikipedia.org/wiki/{place}"

    # Fetch the web page content
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find the coordinates element using CSS selector
    coordinates_element = soup.find('span', class_='geo')
    # Extract coordinates if found
    if coordinates_element:
        coordinates = coordinates_element.get_text()
        coord_list.append(coordinates)
        print(f"{place}: Coordinates: {coordinates}")
    else:
        coord_list.append(None)
        print(f"{place}: Coordinates not found")


Alappuzha_Lok_Sabha_constituency: Coordinates: 9.51; 76.34
Chitradurga_Lok_Sabha_constituency: Coordinates: 14.2; 76.4
Nilgiris_Lok_Sabha_constituency: Coordinates: 11.5; 76.6
Barpeta_Lok_Sabha_constituency: Coordinates: 26.32; 91.00
Diamond_Harbour_Lok_Sabha_constituency: Coordinates: 22.1910091; 88.1904741
Maldaha_Dakshin_Lok_Sabha_constituency: Coordinates: 24.8515889; 88.0109596
Murshidabad_Lok_Sabha_constituency: Coordinates: 24.18; 88.27
Kandhamal_Lok_Sabha_constituency: Coordinates: 20.177813; 84.173564
Nellore_Lok_Sabha_constituency: Coordinates: 14.4; 80.0
Baharampur_Lok_Sabha_constituency: Coordinates: 24.1; 88.25
Attingal_Lok_Sabha_constituency: Coordinates: 8.6800; 76.8300
Ghazipur_Lok_Sabha_constituency: Coordinates: 25.58; 83.58
Tura_Lok_Sabha_constituency: Coordinates: 25.52; 90.19
Nainital-Udhamsingh_Nagar_Lok_Sabha_constituency: Coordinates: 29.2; 79.52
Kheri_Lok_Sabha_constituency: Coordinates: 27.92; 80.73
Bhagalpur_Lok_Sabha_constituency: Coordinates: 25.2; 87.0
Muz

In [7]:
df['coordinates'] = coord_list

In [8]:
df.head()

Unnamed: 0,Sno,Candidate,Constituency,Party,Criminal Case,Education,Total Assets,Liabilities,coordinates
0,1,A M Ariff,ALAPPUZHA,CPI(M),2,Graduate Professional,"Rs 1,52,68,906 ~ 1 Crore+","Rs 22,20,700 ~ 22 Lacs+",9.51; 76.34
1,2,A Narayanaswamy,CHITRADURGA,BJP,0,Graduate,"Rs 9,61,97,642 ~ 9 Crore+",Rs 0 ~,14.2; 76.4
2,3,A. Raja,NILGIRIS,DMK,6,Graduate Professional,"Rs 4,95,91,024 ~ 4 Crore+","Rs 14,24,914 ~ 14 Lacs+",11.5; 76.6
3,4,Abdul Khaleque,BARPETA,INC,0,Post Graduate,"Rs 73,98,753 ~ 73 Lacs+","Rs 27,03,693 ~ 27 Lacs+",26.32; 91.00
4,5,Abhishek Banerjee,DIAMOND HARBOUR,AITC,0,Graduate,"Rs 1,37,94,320 ~ 1 Crore+",Rs 0 ~,22.1910091; 88.1904741


In [9]:
df.isna().sum()

Sno               0
Candidate         0
Constituency      0
Party             0
Criminal Case     0
Education         0
Total Assets      0
Liabilities       0
coordinates      34
dtype: int64

In [10]:
df.loc[df['coordinates'].isna()]

Unnamed: 0,Sno,Candidate,Constituency,Party,Criminal Case,Education,Total Assets,Liabilities,coordinates
35,36,Anurag Singh Thakur,HAMIRPUR,BJP,3,Graduate,"Rs 5,67,70,463 ~ 5 Crore+","Rs 10,85,145 ~ 10 Lacs+",
37,38,Aparupa Poddar (Afrin Ali),ARAMBAG,AITC,1,Post Graduate,"Rs 1,25,27,597 ~ 1 Crore+","Rs 42,44,000 ~ 42 Lacs+",
40,41,Arjun Singh,BARRACKPUR,BJP,24,10th Pass,"Rs 80,55,691 ~ 80 Lacs+","Rs 11,00,000 ~ 11 Lacs+",
49,50,Ashok Mahadevrao Nete,GADCHIROLI CHIMUR,BJP,0,12th Pass,"Rs 5,01,36,369 ~ 5 Crore+","Rs 1,56,82,840 ~ 1 Crore+",
54,55,B.N.Bache Gowda,CHIKKBALLAPUR,BJP,0,Graduate Professional,"Rs 1,15,35,01,141 ~ 115 Crore+",Rs 0 ~,
73,74,Bhavana Pundlikrao Gawali,YAVATMAL WASHIM,SHS,3,Graduate,"Rs 9,68,73,189 ~ 9 Crore+","Rs 73,96,250 ~ 73 Lacs+",
113,114,Delkar Mohanbhai Sanjibhai,DADRA AND NAGAR HAVELI,IND,0,Graduate,"Rs 70,88,85,980 ~ 70 Crore+","Rs 3,72,68,162 ~ 3 Crore+",
118,119,Dharambir Singh,BHIWANI MAHENDRAGARH,BJP,0,Graduate,"Rs 6,06,42,205 ~ 6 Crore+","Rs 2,40,000 ~ 2 Lacs+",
124,125,Dimple Yadav,MAINPURI : BYE ELECTION ON 05-12-2022,SP,0,Graduate,"Rs 39,91,50,143 ~ 39 Crore+","Rs 31,53,000 ~ 31 Lacs+",
148,149,Dr.Beesetti Venkata Satyavathi,ANAKAPALLE,YSRCP,0,Graduate Professional,"Rs 9,15,26,244 ~ 9 Crore+","Rs 1,11,39,279 ~ 1 Crore+",


In [11]:
missing_constituency = df.loc[df.coordinates.isna(), ['Constituency', 'Candidate']]
missing_constituency

Unnamed: 0,Constituency,Candidate
35,HAMIRPUR,Anurag Singh Thakur
37,ARAMBAG,Aparupa Poddar (Afrin Ali)
40,BARRACKPUR,Arjun Singh
49,GADCHIROLI CHIMUR,Ashok Mahadevrao Nete
54,CHIKKBALLAPUR,B.N.Bache Gowda
73,YAVATMAL WASHIM,Bhavana Pundlikrao Gawali
113,DADRA AND NAGAR HAVELI,Delkar Mohanbhai Sanjibhai
118,BHIWANI MAHENDRAGARH,Dharambir Singh
124,MAINPURI : BYE ELECTION ON 05-12-2022,Dimple Yadav
148,ANAKAPALLE,Dr.Beesetti Venkata Satyavathi


The above constituency coordinates could not be found as wiki page with the provided expression could not be found. On further checking it was found that these constituencies have inconsistent wiki page urls. example:

https://en.wikipedia.org/wiki/Hamirpur,_Himachal_Pradesh_Lok_Sabha_constituency,
https://en.wikipedia.org/wiki/Hamirpur,_Uttar_Pradesh_Lok_Sabha_constituency

Wikipedia page search API and matching with candidate name was also tried and could not provide reliable results, as name of candidates is not similar in wikipedia and the data scraped from myneta info.
Hence for these manually values are collected.

In [12]:
missing_coordinates = ['31.7; 76.5',
'22.9; 87.8',
'22.76; 88.37',
'20.3; 79.7',
'13.4; 77.7',
'20.3; 77.6',
'20.27; 73.02',
'28.5; 76.1',
'79.03',
'17.69; 83.00',
'24.8; 76.4',
'22.0; 82.6',
'19.8887861; 75.3434361',
'26.1; 84.5',
'11.6; 75.6',
'11.0; 77.0',
'22.7500; 88.3400',
'16.4; 81.7',
'9.24; 76.54',
'10; 93',
'25.95; 80.15',
'16.7; 78.0',
'26.6; 77.5',
'27.15; 83.56',
'20.42; 72.83',
'26.35; 92.69',
'27.7; 81.84',
'23.1; 83.2',
'26.0; 76.0',
'21.3; 79.9',
'23.4; 87.6',
'24.8; 84.4',
'18.6; 79.4',
'16.7; 73.4']

In [13]:
print(len(missing_coordinates), len(df.loc[df['coordinates'].isna()]))

34 34


Filling the missing coordinates

In [14]:
df.loc[df['coordinates'].isna(), 'coordinates'] = (missing_coordinates)

In [15]:
df.isna().sum()

Sno              0
Candidate        0
Constituency     0
Party            0
Criminal Case    0
Education        0
Total Assets     0
Liabilities      0
coordinates      0
dtype: int64

In [16]:
df.coordinates

0                 9.51; 76.34
1                  14.2; 76.4
2                  11.5; 76.6
3                26.32; 91.00
4      22.1910091; 88.1904741
                ...          
537                23.8; 85.9
538              12.67; 79.28
539              26.22; 78.17
540              15.15; 76.91
541                14.5; 78.8
Name: coordinates, Length: 542, dtype: object

Extracting latitude and longitude values in seperate columns.

In [17]:
df['lat']=df.coordinates.apply(lambda x : float((x.split(';')[0])))
df['lng']=df.coordinates.apply(lambda x : float((x.split(';')[-1])))

Assests value converted to float for easy plotting.

In [18]:
df['Total Assets'] =df['Total Assets'].apply(lambda x: float(x[2:].split('~')[0].replace(",","")))
df['Liabilities'] =df['Liabilities'].apply(lambda x: float(x[2:].split('~')[0].replace(",","")))

In [19]:
df.rename(columns={'Total Assets': 'Total Assets in Rupees'}, inplace = True)
df.rename(columns={'Liabilities': 'Liabilities in Rupees'}, inplace = True)

In [20]:
df.head()

Unnamed: 0,Sno,Candidate,Constituency,Party,Criminal Case,Education,Total Assets in Rupees,Liabilities in Rupees,coordinates,lat,lng
0,1,A M Ariff,ALAPPUZHA,CPI(M),2,Graduate Professional,15268906.0,2220700.0,9.51; 76.34,9.51,76.34
1,2,A Narayanaswamy,CHITRADURGA,BJP,0,Graduate,96197642.0,0.0,14.2; 76.4,14.2,76.4
2,3,A. Raja,NILGIRIS,DMK,6,Graduate Professional,49591024.0,1424914.0,11.5; 76.6,11.5,76.6
3,4,Abdul Khaleque,BARPETA,INC,0,Post Graduate,7398753.0,2703693.0,26.32; 91.00,26.32,91.0
4,5,Abhishek Banerjee,DIAMOND HARBOUR,AITC,0,Graduate,13794320.0,0.0,22.1910091; 88.1904741,22.191009,88.190474


In [21]:
df.to_csv(path_or_buf='lok_sabha_winners_with_geocode.csv', index=False)

### Now plot the dataframe on the map using pydeck.

In [1]:
"""
ScatterplotLayer
================

Plot of the number of exits for various subway stops within San Francisco, California.

Adapted from the deck.gl documentation.
"""
import pydeck as pdk
import pandas as pd
import math
from pydeck.types import String
import random

def scatter_plot(df):
    # Use pandas to calculate additional data
    df["assets_radius"] = df["Total Assets in Rupees"].apply(
        lambda exits_count: math.sqrt(exits_count)
    )
    # print(df)
    random.seed(23)
    color_lookup = pdk.data_utils.assign_random_colors(df['Party'])
    # Assign a color based on Party
    df['color'] = df.apply(lambda row: color_lookup.get(row['Party']), axis=1)
    # # Define a layer to display on a map
    layer = pdk.Layer(
        "ScatterplotLayer",
        df,
        pickable=True,
        opacity=0.4,
        stroked=True,
        filled=True,
        radius_scale=2,
        radius_min_pixels=1,
        radius_max_pixels=100,
        line_width_min_pixels=1,
        get_position=["lng", "lat"],
        get_radius="assets_radius",
        get_fill_color= "color", #"[Party == 'BJP' ? 255 : 0, 0, Party == 'BJP' ? 0 : 255]",
        get_line_color=[0, 0, 0],
    )

    # # Set the viewport location
    view_state = pdk.ViewState(
        latitude=20, longitude=80, zoom=3, bearing=0, pitch=0
    )

    # # Render
    r = pdk.Deck(
        layers=[layer],
        initial_view_state=view_state,
        tooltip={
            "text": "{Party}\n{Constituency}\n{Candidate}\nTotal Assets: Rs. {Total Assets in Rupees}"
        },
    )
    r.to_html("scatterplot_layer.html")
    return r 

In [2]:
df = pd.read_csv("lok_sabha_winners_with_geocode.csv")
deck_object = scatter_plot(df)
deck_object.show()


DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…