In [1]:
import os
import datetime
import pandas as pd
from tqdm import tqdm
from utils import (
    get_usage_last_14_days,
    create_connection,
    create_table_from_df,
    delete_table,
)

%load_ext nb_black

<IPython.core.display.Javascript object>

## Get all historical data

In [3]:
today_str = "05-03-2022"
cumulative_df = pd.read_pickle(
    f"data/prod_archive/{today_str}/cumulative_tracking_{today_str}.pkl"
)

<IPython.core.display.Javascript object>

In [4]:
cumulative_df

Unnamed: 0,clones_unique,clones_total,views_unique,views_total,repo
2021-02-10,1,1,0,0,Churn_Prediction
2021-02-10,2,2,0,0,Image_Analysis
2021-02-10,0,0,0,0,Anomaly_Detection
2021-02-10,0,0,0,0,NeuralQA
2021-02-10,7,10,8,41,Structural_Time_Series
...,...,...,...,...,...
2022-05-02,0,0,0,0,Train_Gensim_W2V
2022-05-02,1,1,0,0,Tensorboard_on_CML
2022-05-02,0,0,0,0,Continuous_Model_Monitoring
2022-05-02,0,0,0,0,Video_Classification


<IPython.core.display.Javascript object>

In [5]:
cumulative_df.loc["05-02-2022"]

Unnamed: 0,clones_unique,clones_total,views_unique,views_total,repo
2022-05-02,4,5,4,12,Churn_Prediction
2022-05-02,1,2,1,4,Image_Analysis
2022-05-02,1,1,2,2,Anomaly_Detection
2022-05-02,0,0,2,2,NeuralQA
2022-05-02,1,1,3,3,Structural_Time_Series
2022-05-02,0,0,0,0,SpaCy_Entity_Extraction
2022-05-02,0,0,2,3,Explainability_LIME_SHAP
2022-05-02,0,0,3,7,Question_Answering
2022-05-02,0,0,0,0,Active_Learning
2022-05-02,1,1,1,1,MLFlow_Tracking


<IPython.core.display.Javascript object>

## Collect data from daily archives for the missing dates

In [6]:
missing_range = pd.date_range(start="03-13-2022", end="05-03-2022")

<IPython.core.display.Javascript object>

In [7]:
daily_archives = []
for date in tqdm(missing_range):
    try:
        temp_daily_df = pd.read_pickle(
            f'data/daily_archive/{date.strftime("%m-%d-%Y")}/daily_tracking_{date.strftime("%m-%d-%Y")}.pkl'
        )
        daily_archives.append(temp_daily_df)
    except FileNotFoundError:
        print(f'No archive for {date.strftime("%m-%d-%Y")}')

100%|██████████| 52/52 [00:00<00:00, 397.48it/s]

No archive for 03-13-2022
No archive for 04-15-2022
No archive for 04-23-2022
No archive for 04-24-2022





<IPython.core.display.Javascript object>

In [8]:
daily_archives[-1]

Unnamed: 0,clones_unique,clones_total,views_unique,views_total,repo
2022-05-02,4,5,4,12,Churn_Prediction
2022-05-02,1,2,1,4,Image_Analysis
2022-05-02,1,1,2,2,Anomaly_Detection
2022-05-02,0,0,2,2,NeuralQA
2022-05-02,1,1,3,3,Structural_Time_Series
2022-05-02,0,0,0,0,SpaCy_Entity_Extraction
2022-05-02,0,0,2,3,Explainability_LIME_SHAP
2022-05-02,0,0,3,7,Question_Answering
2022-05-02,0,0,0,0,Active_Learning
2022-05-02,1,1,1,1,MLFlow_Tracking


<IPython.core.display.Javascript object>

In [9]:
daily_archives_df = pd.concat(daily_archives)

<IPython.core.display.Javascript object>

## Combine historical with missing records

In [10]:
new_cumulative_df = pd.concat(
    [
        cumulative_df[
            ~cumulative_df.index.isin(["05-01-2022", "05-02-2022"])
        ],  # filter out latest two days, they're in daily_archives_df
        daily_archives_df,
    ]
)

<IPython.core.display.Javascript object>

In [20]:
new_cumulative_df.shape

(7143, 5)

<IPython.core.display.Javascript object>

In [21]:
new_cumulative_df_cumulative_df.head()

Unnamed: 0,clones_unique,clones_total,views_unique,views_total,repo
2021-02-10,1,1,0,0,Churn_Prediction
2021-02-10,2,2,0,0,Image_Analysis
2021-02-10,0,0,0,0,Anomaly_Detection
2021-02-10,0,0,0,0,NeuralQA
2021-02-10,7,10,8,41,Structural_Time_Series


<IPython.core.display.Javascript object>

## Save new cumulative_df

In [16]:
today_str = "05-03-2022"
new_cumulative_df.to_pickle(
    f"data/prod_archive/{today_str}/cumulative_tracking_{today_str}.pkl"
)

<IPython.core.display.Javascript object>

## Update SQL db

In [17]:
# 4. Delete existing SQLite tables
conn = create_connection(f"{os.getcwd()}/db/pythonsqlite.db")

<IPython.core.display.Javascript object>

In [18]:
delete_table("amp_tracking", conn)
# delete_table("amp_referring", conn)

<IPython.core.display.Javascript object>

In [19]:
# 5. Create new tables to refresh data

create_table_from_df("amp_tracking", conn, new_cumulative_df)
# create_table_from_df("amp_referring", conn, amp_referring_df)

<IPython.core.display.Javascript object>