# Visualize
#### By Julien Dhouti

In [1]:
# First we import the necessary libraries
import pandas as pd
import plotly.plotly as py
import plotly.tools as tls
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import numpy as np

# turn off uploads for graphs in this notebook:
init_notebook_mode(connected=True)

In [2]:
df = pd.read_csv('data/clean_data.csv')
df = df.drop(['Unnamed: 0'], axis=1)

Now that the data has been cleaned and stripped of any missing values, we can start visualizing it using the plotly library

In [3]:
# We start by creating a basic line graph to show how I did in sales this month
x = df.date
y1 = df.sales
y2 = df.new_sales

# Create the trace
total_sales = go.Scatter(
    x=x,
    y=y1,
    mode='markers+lines',
    name='Total Sales',
    line=dict(
        color='rgb(244, 66, 149)'
    )
)

new_sales = go.Scatter(
    x=x,
    y=y2,
    mode='markers+lines',
    name='New Sales',
    line=dict(
        color='rgb(77, 244, 65)'
    )
)
# Create the data from the traces
data = go.Data([total_sales, new_sales])

# Create the layout object
layout = go.Layout(
    title='Total Sales for April',
    xaxis=go.XAxis(title='Days'),
    yaxis=go.YAxis(title='Sales ($)')
)

# Put it all together and let's check it out
figure = go.Figure(data=data, layout=layout)
iplot(figure)

Interesting, from the graph we can see that I performed better the first half of the month and then slowly performed worse as we approached the end.

One thing that I wanted to find out is whether the number of sales I get a day is correlated with the number of calls that I receive. To answer this, I created a basic scatter plot.

In [4]:
x = df.new_sales
y = df.calls_per_day

# Create the traces
main = go.Scatter(
    x=x,
    y=y,
    mode='markers',
    text=df.date,
    marker=dict(
        color='rgb(130, 175, 255)',
        size=12,
        line=dict(
            width=1
        )
    )
)

# create the data
data = go.Data([main])

# create the layout
layout = go.Layout(
    title='New Sales vs. Number of Calls',
    xaxis=go.XAxis(title='New Sales ($)'),
    yaxis=go.YAxis(title='Number of Calls')
)

# finish the graph
figure = go.Figure(data=data, layout=layout)
iplot(figure)

There doesn't seem to be any correlation from the graph. Perhaps I should wait until I have more data.
I could still find the correlation coefficient to get the exact value however.

In [8]:
np.corrcoef(df.new_sales, df.calls_per_day)

array([[ 1.        ,  0.22895739],
       [ 0.22895739,  1.        ]])

Yikes, there's no correlation at all: 0.229

Another graph that would be interesting is comparing 3 variables which would require a 3d scatter plot. The 3 variables that I want to compare are customer availability, number of orders, and the amount of new sales. This would allow me to see if by increasing the availability, everything else increases with it.

In [5]:
x = df.new_sales
y = df.orders
z = df.calls_per_day

# create the trace
main = go.Scatter3d(
    x=x,
    y=y,
    z=z,
    text=df.date,
    mode='markers',
    marker=dict(
        color='rgb(164, 244, 66)',
        size=13,
        line=dict(
            width=1
        )
    )
)

# create the data
data = go.Data([main])

# create the layout
layout = go.Layout(
    scene = dict(
        xaxis = dict(title='New Sales ($)'),
        yaxis = dict(title='# of orders'),
        zaxis = dict(title='# of calls'),
    ),
    width=700,
    title='New Sales vs. Orders vs. Calls',
    margin=dict(
        r=20, b=10,
        l=10, t=10
    )
)

# Create the figure
figure = go.Figure(data=data, layout=layout)
iplot(figure)

It would be interesting to generate a simple line graph will all of the important variables graphed. To do this however, we will need to create a function that can generate graphs for us.

In [6]:
df.columns

Index(['date', 'cust_avail_v3', 'css_count', 'css_score', 'orders', 'new_conv',
       'new_sales', 'new_sales_perc', 'sales', 'calls_per_day'],
      dtype='object')

In [16]:
columns  = ['cust_avail_v3', 'css_score', 'orders', 'new_conv', 'new_conv']
N = len(columns)
colors = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 360, N)]
data = []

for index in range(N):
    data.append(
        go.Scatter(
            y=df[columns[index]],
            x=df.date,
            text=columns[index],
            mode='markers+lines',
            yaxis=columns[index],
            marker=dict(
                color=colors[index]
            )
        )
    )

In [17]:
figure = go.Figure(data=data, layout=go.Layout())
iplot(figure)