In [None]:
# standard libraries
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import os
import re

# plotting libraries
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# get the datetime library for date & time calcs
from datetime import datetime, timedelta

In [None]:
# set the directory

checkingGoals = pd.ExcelFile(os.path.normpath('C:/Users/n846490/Documents/DigitalAnalytics/CheckingAnalysis/CheckingGoals20152016.xlsx'))

In [None]:
goals = checkingGoals.parse('Dataset2')
goals.head()



In [None]:
goals.info()

In [None]:
# let's plot some items in the data frame

# set the figure size
fig = plt.figure(figsize = (15,10))

# use ax1 for plotting the Credit data
# (211) cuts the subplot into two plots (rows), and 1 column, plot the first plot

# the SRS
ax1 = fig.add_subplot(211)
ax1.plot(goals['Day Index'], goals.Goal1SRC, color = 'skyblue', label = 'SRC')

# plot the legend for the first plot
ax1.legend(loc = 'upper right', fontsize = 14)

plt.ylabel('Checking Goal App Starts', fontsize=16)
plt.setp(ax1.get_yticklabels(), fontsize=14) 

# Hide the top x axis
ax1.axes.get_xaxis().set_visible(False)

############################################################################################
#######  NOW PLOT THE OTHER GOALS ON A SINGLE PLOT

# plot 212 is the smaller series

# plot the basic
ax1 = fig.add_subplot(212)
ax1.plot(goals['Day Index'], goals.Goal2Basic, color = 'salmon', label = 'Basic')

# plot the student
ax1 = fig.add_subplot(212)
ax1.plot(goals['Day Index'], goals.Goal3Student, color = 'indigo', label = 'Student')

# plot the premier
ax1 = fig.add_subplot(212)
ax1.plot(goals['Day Index'], goals.Goal4Premier, color = 'cyan', label = 'Premier')

# plot the legend for the second plot
ax1.legend(loc = 'upper right', fontsize = 14)

# set the fontsize for the bottom plot
plt.ylabel('Checking Goal App Starts', fontsize=16)
plt.setp(ax1.get_yticklabels(), fontsize=14) 
plt.setp(ax1.get_xticklabels(), fontsize=14) 


plt.tight_layout()
plt.show()

In [None]:
# first define r_squared
def r_squared(actual, ideal):
    actual_mean = np.mean(actual)
    ideal_dev = np.sum([(val - actual_mean)**2 for val in ideal])
    actual_dev = np.sum([(val - actual_mean)**2 for val in actual])

    return ideal_dev / actual_dev

In [None]:
# combined and fit line

from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA

##########################################  Perform a Linear Regression for Fitting
dates = goals['Day Index']
conversions = goals['TotalCheckingGoals']

year_start = datetime(2015, 10, 1)
days = np.array([(d - year_start).days + 1 for d in dates])

# make the linear fit
slope, intercept = np.polyfit(days, conversions, 1)
ideal_convs = intercept + (slope * days)
r_sq = r_squared(conversions, ideal_convs)

############################################   This is all for plotting

X1 = dates
y1 = conversions

X2 = days
y2 = ideal_convs

fit_label = 'Linear fit ({0:.2f})'.format(slope)


fig = plt.figure(figsize = (15,8))
ax1 = fig.add_subplot(111)
ax1.plot(X1, y1, label = 'Conversions')
ax1.set_ylabel('Conversions', fontsize = 14)
ax2 = ax1.twiny()   # this is the important function
ax2.plot(X2, y2, 'r', label = fit_label)
ax2.set_xlim([1,469])
# ax2.set_xlabel('Time in Days')

# this moves the axis to the bottom
ax2.spines['top'].set_position(('axes', -.20))
ax2.set_frame_on(True)
ax2.patch.set_visible(False)
ax2.axes.get_xaxis().set_visible(False)

# annotate and add legend
plt.annotate('r^2 = {0:.2f}'.format(r_sq), (0.07, 0.90), xycoords='axes fraction', size = 14)
plt.legend(loc='upper left', fontsize = 14) 

plt.setp(ax1.get_yticklabels(), fontsize=14) 
plt.setp(ax1.get_xticklabels(), fontsize=14) 

plt.show() 
