In [None]:
import pandas as pd
from seaborn import set_style
set_style("darkgrid")
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
pd.set_option("max_rows", 10)

## Google Finance

In [None]:
from pandas.io import data
y = data.DataReader(name="GOOG", data_source="google")

avg = y.Close.resample("M", "mean")
ravg = pd.rolling_mean(avg, window=12, center=True, )

ax = avg.plot(style="r--")
ravg.plot(style="r", ax=ax)

In [None]:
ravg

In [None]:
from pandas.io import data
y = data.DataReader(name="AAPL", data_source="google")

avg = y.Close.resample("M", "mean")
ravg = pd.rolling_mean(avg, window=12, center=True)

ax = avg.plot(style="r--")
ravg.plot(style="r", ax=ax)

## Microbiome

In [None]:
meta_df = pd.read_excel("../data/microbiome/metadata.xls")

In [None]:
meta_df

In [None]:
from glob import glob

mid_files = glob("../data/microbiome/MID*.xls")

In [None]:
mid_files

In [None]:
import os

In [None]:
df = []
keys = []

for fname in mid_files:
    dta = pd.read_excel(fname, header=None, names=["Taxon", "Counts"])
    dta["BARCODE"] = os.path.basename(fname)[:4]
    dta = dta.merge(meta_df)
    df.append(dta)
    
df = pd.concat(df).set_index(["Taxon", "BARCODE"])

In [None]:
df.head()

## FEC GroupBy

In [None]:
%run load_fec

In [None]:
fec['date'] = pd.to_datetime(fec.date)

Path of least resistance is to make new columns

In [None]:
fec['month'] = fec.date.dt.month
fec['year'] = fec.date.dt.year

In [None]:
totals = fec.groupby(['month', 'year', 'candidate']).amount.sum()

In [None]:
totals.head()

## FEC Refund Counts

In [None]:
fec["party"] = ""
fec.ix[fec.candidate.str.contains("Obama"), "party"] = "Democrat"
fec.ix[fec.candidate.str.contains("Earl"), "party"] = "Libertarian"
fec.ix[fec.candidate.str.contains("Jill"), "party"] = "Green"
fec.ix[fec.party.str.match("^$"), "party"] = "Republican"

In [None]:
refunds = fec.groupby(["year", 
                       "month", 
                       "party"]).apply(lambda x : sum(x.amount < 0))

In [None]:
refunds = refunds.unstack(level="party")

A lot of people asking for refunds after the election?

In [None]:
refunds.sort_index().tail()

## Contributor Occupations

In [None]:
def groupby_occup_sum(dta, n=5):
    grouped = dta.groupby("occupation").amount.sum()
    grouped.sort(ascending=False)
    return grouped.iloc[:n]

In [None]:
def groupby_occup_count(dta, n=5):
    grouped = dta.groupby("occupation").amount.size()
    grouped.sort(ascending=False)
    return grouped.iloc[:n]

In [None]:
pos_fec = fec.ix[fec.amount > 0]

In [None]:
g = pos_fec.groupby(["candidate"])

In [None]:
occup_size = pos_fec.groupby(["candidate"]).apply(groupby_occup_sum, n=10)
occup_count = pos_fec.groupby(["candidate"]).apply(groupby_occup_count)

In [None]:
with pd.option_context("max_rows", 100):
    print(occup_size)

In [None]:
with pd.option_context("max_rows", 100):
    print(occup_count)

## FEC Long to Wide

In [None]:
fec_amounts = fec[["state", "date", "candidate", "amount"]]
fec_amounts = fec_amounts.groupby(["state", "date", "candidate"], 
                                  as_index=False).sum()

In [None]:
fec_amounts = fec_amounts.pivot_table(index="date", 
                                      columns=["candidate", "state"], 
                                      values="amount"
                                      ).resample("M", "sum")

In [None]:
fec_monthly = fec_amounts.resample("M", "sum")

In [None]:
fec_monthly = fec_monthly.T

In [None]:
fec_monthly = fec_monthly.sort_index(axis=1, ascending=False)

In [None]:
fec_monthly.head()