In [2]:
# special IPython command to prepare the notebook for matplotlib
%matplotlib inline 

from fnmatch import fnmatch

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import bs4

# set some nicer defaults for matplotlib
from matplotlib import rcParams

#these colors come from colorbrewer2.org. Each is an RGB triplet
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
                (0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
                (0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
                (0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
                (0.4, 0.6509803921568628, 0.11764705882352941),
                (0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
                (0.6509803921568628, 0.4627450980392157, 0.11372549019607843),
                (0.4, 0.4, 0.4)]

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.grid'] = True
rcParams['axes.facecolor'] = '#eeeeee'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'none'



In [3]:
def get_pool_xml(poll_id):
    url='http://charts.realclearpolitics.com/charts/'+str(poll_id)+'.xml'
    content=requests.get(url).text
    return content

In [4]:
import re

def _strip(s):
    """This function removes non-letter characters from a word
    
    for example _strip('Hi there!') == 'Hi there'
    """
    return re.sub(r'[\W_]+', '', s)

In [111]:
def rcp_poll_data(xml):
    soup=bs4.BeautifulSoup(xml,'lxml')
    date=[]
    Obama=[]
    Romney=[]
    
    for i in soup.series:
        date.append(i.text)
    for i in soup.graphs:
        if i.attrs['title']=='Obama':
            for j in i:
                Obama.append(j.text)
        if i.attrs['title']=='Romney':
            for j in i:
                Romney.append(j.text)
                
    result=pd.DataFrame({'date':date,'Obama':Obama,'Romney':Romney})
    result=result[result.Obama!='']
    result['Obama']=result.Obama.astype(float)
    result['Romney']=result.Romney.astype(float)
    return result

In [112]:
def plot_colors(xml):
    soup=bs4.BeautifulSoup(xml,'lxml')
    result={}
    for graph in soup.graphs:
        title=_strip(graph.attrs['title'])
        result[title]=graph.attrs['color']
    return result
xml=get_pool_xml(1171)
data=rcp_poll_data(xml)
colors=plot_colors(xml)
data[list(colors.keys())].sum()

Obama     30385.1
Romney    28458.6
dtype: float64

In [63]:
def poll_plot(poll_id):
    """
    Make a plot of an RCP Poll over time
    
    Parameters
    ----------
    poll_id : int
        An RCP poll identifier
    """

    # hey, you wrote two of these functions. Thanks for that!
    xml = get_pool_xml(poll_id)
    data = rcp_poll_data(xml)
    colors = plot_colors(xml)

    #remove characters like apostrophes
    data = data.rename(columns = {c: _strip(c) for c in data.columns})

    #normalize poll numbers so they add to 100%    
    norm = data[list(colors.keys())].sum(axis=1) / 100    
    for c in list(colors.keys()):
        data[c] /= norm
    
    for label, color in colors.items():
        plt.plot(data.date, data[label], color=color, label=label)        
        
    plt.xticks(rotation=70)
    plt.legend(loc='best')
    plt.xlabel("Date")
    plt.ylabel("Normalized Poll Percentage")

In [64]:
poll_plot(1171)
plt.title("Obama Job Approval")

TypeError: unsupported operand type(s) for /: 'str' and 'int'