In [14]:
import psycopg2
import json
import numpy as np
import pandas as pd
import datetime
import os
from nltk.corpus import words
import statsmodels.api as sm
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
plt.style.use("ggplot")
%matplotlib inline

In [2]:
def  plot_seasonal_decomposition(axs, series, sd):
    axs[0].plot(series.index, series)
    axs[0].set_title("Raw Series")
    axs[1].plot(series.index, sd.trend)
    axs[1].set_title("Trend Component $T_t$")
    axs[2].plot(series.index, sd.seasonal)
    axs[2].set_title("Seasonal Component $S_t$")
    axs[3].plot(series.index, sd.resid)
    axs[3].set_title("Residual Component $R_t$")

In [3]:
with open("../../elections_login.json", "rb") as f:
    login_info = json.load(f)

In [4]:
conn = psycopg2.connect(host=login_info["ip"]
                       ,dbname=login_info["db"]
                       ,user=login_info["username"]
                       ,password=login_info["password"])

In [5]:
cursor = conn.cursor()

In [89]:
cursor.execute("select column_name from information_schema.columns where table_name='transaction_details'")
column_names = [row[0] for row in cursor]

In [6]:
cursor.execute("""SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")
tables = [x[0] for x in cursor.fetchall()]

In [7]:
tables

['transaction_details',
 'committee_history',
 'election_activity',
 'django_content_type',
 'auth_group_permissions',
 'auth_group',
 'auth_user_groups',
 'auth_permission',
 'auth_user_user_permissions',
 'auth_user',
 'scraper_date_range',
 'django_admin_log',
 'committees_list',
 'donor',
 'payee',
 'statement_of_org',
 'transactions',
 'django_migrations',
 'ballots',
 'django_session']

In [8]:
transactions_df = pd.read_sql("SELECT * FROM transactions", conn)

In [9]:
conn.close()

# NEW METHOD

**FINDING ELECTION SEASONS**

In [10]:
transactions_df["transaction_date"] = pd.to_datetime(transactions_df["transaction_date"])
money_in_df = transactions_df[transactions_df.transaction_subtype == "Cash Contribution"].copy()
money_out_df = transactions_df[transactions_df.transaction_subtype == "Cash Expenditure"].copy()

In [11]:
def next_weekday(d):
    weekday = d.weekday()
    if weekday < 3:
        d = d - datetime.timedelta(weekday)
    else:
        d = d + datetime.timedelta(7 - weekday)
    return d

In [12]:
def next_month(d):
    day = d.day
    month = d.month
    year = d.year
    if day < 15:
        day = 1
    if day >= 15:
        if month == 12:
            month = 1
            day = 1
            year = year + 1
        else:
            month = month + 1
            day = 1
    return datetime.date(year, month, day)

In [13]:
money_in_df["week_round"] = money_in_df.transaction_date.map(lambda x:next_weekday(x))
money_in_df["month_round"] = money_in_df.transaction_date.map(lambda x: next_month(x))
money_in_df["week_round"] = pd.to_datetime(money_in_df.week_round)
money_in_df["month_round"] = pd.to_datetime(money_in_df.month_round)

NameError: name 'datetiame' is not defined

In [None]:
month_round_df = money_in_df[["month_round", "amount"]].groupby("month_round").agg("count")
week_round_df = money_in_df[["week_round", "amount"]].groupby("week_round").agg("count")

In [None]:
month_decomp = sm.tsa.seasonal_decompose(month_round_df, freq=12)
fig, axs = plt.subplots(4, figsize=(14, 8))
plot_seasonal_decomposition(axs,month_round_df,month_decomp)

In [None]:
week_decomp = sm.tsa.seasonal_decompose(week_round_df, freq=52)
fig, axs = plt.subplots(4, figsize=(14, 8))
plot_seasonal_decomposition(axs,week_round_df,week_decomp)