In [8]:
import json, ast
import pandas as pd
from django.utils.encoding import smart_str, smart_unicode
import requests_cache
import requests
import re
from bs4 import BeautifulSoup
from IPython.display import display
from ipywidgets import widgets

In [9]:
requests_cache.install_cache("cache")

def modify(city):
    """
    modify the input
    
    Argument: city
    
    Return: soup from the url link
    """
    # capitalize the first letter of each word in a string
    city = city.title()
    # replace the empty space with the _
    city = city.strip().replace(' ', '_')
    url = "https://en.wikipedia.org/wiki/" + str(city) +",_California"
    info = requests.get(url).content
    wikisoup = BeautifulSoup(info, 'lxml')
    return wikisoup

In [18]:
def extract_overview(city):
    """
    extract a brief overview from the wikipedia page
    
    Argument: city
    
    Return: print the overview description
    """
    wikisoup = modify(city)
    content = wikisoup.findAll("p")
    if content[0].text == "California":
        brief = content[1].text
    else:
        brief = content[0].text
    brief = ast.literal_eval(json.dumps(brief))
    print brief

In [11]:
def extract_info(city):
    """
    extract the basic information of the city
    
    Argument: city
    
    Return: a dataframe with the basic information
    """
    wikisoup = modify(city)
    data1 = wikisoup.findAll("tr", {"class": "mergedrow"})
    data = [dat.text for dat in data1]
    split = [dat.strip("\n") for dat in data]
    pd_info = pd.DataFrame(split)
    pd_info.rename(columns = {list(pd_info)[0]: 'info'}, inplace = True)

    #remove []
    pd_info['info'] = pd_info['info'].map(lambda x: re.sub("(\[).*?([\]])", "", x))
    #remove unwanted characters
    pd_info['info'] = pd_info['info'].map(lambda x: re.sub(ur'[\•]', ' ', x))
    pd_info['info'] = pd_info['info'].str.strip("\n")
    pd_info['info'] = pd_info['info'].str.strip()
    pd_info['info'] = pd_info['info'].str.replace("\n", " : ")
    pd_info['info'] = pd_info['info'].str.replace(" [: ]+", " : ")
    return pd_info

In [13]:
# create the interactive function to ask for user input
inputText = widgets.Text()

def modify_input(sender):
    outputText.value = "Welcome to The City of " + inputText.value.title() + "!"

inputText.on_submit(modify_input)
inputText

In [14]:
outputText = widgets.Text()
outputText

In [19]:
extract_overview("San Francisco")

San Francisco (SF) (/s\u00e6n fr\u0259n\u02c8s\u026asko\u028a/) (Spanish for Saint Francis) officially the City and County of San Francisco, is the cultural, commercial, and financial center of Northern California. It is the birthplace of the United Nations.[23][24][25] Located at the north end of the San Francisco Peninsula, San Francisco is about 47.9 square miles (124\u00a0km2)[17] in area, making it the smallest county\u2014and the only consolidated city-county[26]\u2014within the state of California. With a density of about 18,451 people per square mile (7,124 people per km2), San Francisco is the most densely settled large city (population greater than 200,000) in California and the second-most densely populated major city in the United States after New York City.[27] San Francisco is the fourth-most populous city in California, after Los Angeles, San Diego, and San Jose, and the 13th-most populous city in the United States\u2014with a census-estimated 2015 population of 864,816.

In [12]:
extract_info("san francisco")

Unnamed: 0,info
0,"Motto: Oro en Paz, Fierro en Guerra (Spanish) ..."
1,State : California
2,CSA : San Jose–San Francisco–Oakland
3,Metro : San Francisco–Oakland–Hayward
4,"Incorporated : April 15, 1850"
5,Founded by : José Joaquin Moraga : Francisco P...
6,Named for : St. Francis of Assisi
7,Type : Mayor-council
8,Body : Board of Supervisors
9,Mayor : Edwin M. Lee (D)
