# Preparation to work with Google Colab

In [None]:
#Mount google drive as your hardisk
from os.path import join
from google.colab import drive

ROOT = "/content/drive"
drive.mount(ROOT)

In [None]:
PROJECT_PATH = '/content/drive/My Drive/Springboard/Take_home_assignment_1/ultimate_challenge/'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import time
import pickle
import json
import urllib.request

# Part 1 ‐ Exploratory data analysis
The attached logins.json file contains (simulated) timestamps of user logins in a particular
geographic location. Aggregate these login counts based on 15­minute time intervals, and
visualize and describe the resulting time series of login counts in ways that best characterize the
underlying patterns of the demand. Please report/illustrate important features of the demand,
such as daily cycles. If there are data quality issues, please report them.

In [None]:
# load the login data
FILE = 'logins.json'
with open(PROJECT_PATH +  FILE) as json_file:
    login_dict = json.load(json_file)
    json_file.close()

#print(login_dict)

In [None]:
login_df = pd.DataFrame(login_dict)

In [None]:
login_df.shape

In [None]:
login_df['login_time'] = pd.to_datetime(login_df['login_time'])

In [None]:
login_df.index = login_df['login_time']

In [None]:
login_df_res = login_df.resample('15T').count()

In [None]:
login_df_res.shape

In [None]:
login_df_res.head()

In [None]:
login_df_res.columns = ['qty']
login_df_res['year'] = login_df_res.index.year
login_df_res['month'] = login_df_res.index.month
login_df_res['day'] = login_df_res.index.day
login_df_res['dayofweek'] = login_df_res.index.strftime('%A')
login_df_res['hour'] = login_df_res.index.hour
login_df_res['minute'] = login_df_res.index.minute

In [None]:
login_df_res.head()

In [None]:
login_df_res.qty.sum()

In [None]:
login_df_res.describe().T

In [None]:
login_df_res.loc[login_df_res.qty == 73]

In [None]:
login_df_res['qty'].plot(alpha=0.5, 
                         style='-',
                         figsize = (20,6))
plt.legend(['quantity of logins'],
           loc='upper left')
plt.title('Complete time series of logins, 15 minutes intervals')
plt.show()

In [None]:
# overall day pattern
login_df_res.groupby(login_df_res.index.time)['qty'].mean().plot(alpha=0.5, 
                                              style='-',
                                              figsize = (15,4))
plt.legend(['quantity of logins'],
           loc = 'best')
plt.title('Average logins per day, 15 minutes intervals')
plt.show()

In [None]:
days_of_week = ['Monday', 'Tuesday','Wednesday','Thursday', 'Friday', 'Saturday', 'Sunday']
for d in days_of_week:
    day_of_week = login_df_res.loc[login_df_res.dayofweek == d, 'qty']
    day_of_week.groupby(day_of_week.index.time).mean().plot(alpha=0.5, 
                                                  style='-',
                                                  figsize = (15,4),
                                                  label = 'average logins, each 15 min')
    plt.axhline(y=day_of_week.mean(), color='r', linestyle='--', label = 'Overall average')
    plt.legend(loc = 'best')
    plt.title('Average logins per day, 15 minutes intervals, for {}'.format(d))
    plt.show()
    print('Oveall average: {:.2f}'.format(day_of_week.mean()))

In [None]:
daily = login_df_res['qty'].resample('D').sum()
weekly = login_df_res['qty'].resample('W').sum()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (20,6))
fig.suptitle('Resampling a different rates')
ax1.plot(daily, alpha=0.5 )
ax2.plot(weekly, alpha=0.5)
ax1.set_title('Daily');
ax2.set_title('Weekly');
plt.show()

### Part 1: Summary of conclusions from exploratory analysis

1. Login data ranges from Jan, 1st, 1970 up to Apr, 15th, 1970
2. The year 1970, probably is wrong, or was disguised, because this time of service did not exist in 1970
3. The average number of logins in a 15 interval is 9.51
4. The maximum number of logins registered was 73, which happend on March, 01st at 4. 04:30, a Sunday
5. Weekdays from Monday to Friday, have averge logins per 15 minutes ranging from 6 to 10, while during weekends this average goes up from 12 to 13
6. Peek hours during weekdays happens near midday and also from 8 to 11 p.m., while during weekends the busy hours are late night (after 0 a.m. up to 4 a.m.)
7. We verified more activity during March and April compared to January and February


