In [1]:
import requests
import pandas as pd
import datetime as dt
import os
import plotly.graph_objects as go
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
import numpy as np

In [2]:
# Change the file names to 
viewsfile = 'temp/updated_views_2019-10-19_19-02-05.csv'
clonesfile = 'temp/updated_clones_2019-10-19_18-42-52.csv'
views = pd.read_csv(viewsfile)
clones = pd.read_csv(clonesfile)

In [3]:
def reg_line_pts(df, col2):
    xaxis = [x for x in range(len(df))]
    yaxis = df[col2].values
    
    # calculation of slope using pearsonr for non-gaussian distributions
    corr, _ = pearsonr(xaxis, yaxis)
    m = corr * (np.std(yaxis) / np.std(xaxis))
    
    # calculation of y-intercept based on means
    yint = np.mean(yaxis) - m * np.mean(xaxis)
    
    predictions = [m*i + yint for i in xaxis]
    return predictions

def reg_line_pts2(df, col2):
    data = pd.DataFrame({
        'x': [x for x in range(len(df))],
        'y': [x for x in df[col2].values]
    })
    X = data.iloc[:, 0].values.reshape(-1, 1)
    Y = data.iloc[:, 1].values.reshape(-1, 1)
    reg = LinearRegression()
    reg.fit(X, Y)
    return reg.predict(X).T[0]

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=views['yr_mo_day'], y=views['count'],
                         mode='lines+markers', name='Actual Points'))
fig.add_trace(go.Scatter(x=views['yr_mo_day'], y=reg_line_pts2(views, 'count'), 
                         mode='lines+markers', name='Trendline'))
fig.update_layout(title='Views Per Day', xaxis_title='Date', yaxis_title='Count')
fig.show()

In [6]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=views['yr_mo_day'], y=views['uniques'],
                         mode='lines+markers', name='Actual Points'))
fig.add_trace(go.Scatter(x=views['yr_mo_day'], y=reg_line_pts2(views, 'uniques'), 
                         mode='lines+markers', name='Trendline'))
fig.update_layout(title='Unique Views Per Day', xaxis_title='Date', yaxis_title='Unique Count')
fig.show()

In [7]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=clones['yr_mo_day'], y=clones['count'],
                         mode='lines+markers', name='Actual Points'))
fig.add_trace(go.Scatter(x=clones['yr_mo_day'], y=reg_line_pts2(clones, 'count'), 
                         mode='lines+markers', name='Trendline'))
fig.update_layout(title='Clones Per Day', xaxis_title='Date', yaxis_title='Counts')
fig.show()

In [8]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=clones['yr_mo_day'], y=clones['uniques'],
                         mode='lines+markers', name='Actual Points'))
fig.add_trace(go.Scatter(x=clones['yr_mo_day'], y=reg_line_pts2(clones, 'uniques'), 
                         mode='lines+markers', name='Trendline'))
fig.update_layout(title='Unique Clones Per Day', xaxis_title='Date', yaxis_title='Unique Counts')
fig.show()