In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../../')

import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy
import datetime

from data.dataloader import AthenaLoader, Covid19IndiaLoader
from data.processing.processing import get_dataframes_cached, get_custom_data_from_db

In [None]:
dataframes = get_dataframes_cached(loader_class=AthenaLoader)

In [None]:
df_testing = copy.copy(dataframes['testing_summary'])
del df_testing['partition_0']
del df_testing['new']

df_testing = df_testing.loc[df_testing['district'] == 'mumbai', :]

df_testing.dropna(axis=0, how='any', inplace=True)
df_testing['date'] = pd.to_datetime(df_testing['date'])
df_testing = df_testing.infer_objects()
df_testing['positives'] = df_testing['positives'].astype('int64')
df_testing['tests'] = df_testing['tests'].astype('int64')
df_testing['tpr'] = (df_testing['positives']*100/df_testing['tests'])

In [None]:
df_testing.reset_index(inplace=True)

In [None]:
fig, axs = plt.subplots(figsize=(18, 12), nrows=2)
axs[0].plot(df_testing['date'], df_testing['tests'], '--o', color='C0', label='Tests (Actual)')
axs[0].plot(df_testing['date'], df_testing['tests'].rolling(7, center=True).mean(), '-', color='C0', label='Tests (RA)')
axs[0].plot(df_testing['date'], df_testing['positives'], '--o', color='orange', label='Positives (Actual)')
axs[0].plot(df_testing['date'], df_testing['positives'].rolling(7, center=True).mean(), '-', color='orange', label='Positives (RA)')
axs[0].axvline(x=datetime.datetime.strptime('2020-06-01', '%Y-%m-%d'), ls=':')
axs[0].legend()
axs[0].grid()
axs[1].plot(df_testing['date'], df_testing['tpr'], '--o', color='red', label='TPR (Actual)')
axs[1].plot(df_testing['date'], df_testing['tpr'].rolling(7, center=True).mean(), '-', color='red', label='TPR (RA)')
axs[1].axvline(x=datetime.datetime.strptime('2020-06-01', '%Y-%m-%d'), ls=':')
axs[1].legend()
axs[1].grid()

In [None]:
df_testing_roll = copy.copy(df_testing)
df_testing_roll['tests'] = df_testing_roll['tests'].rolling(7, center=True).mean()
df_testing_roll['positives'] = df_testing_roll['positives'].rolling(7, center=True).mean()
df_testing_roll['tpr'] = df_testing_roll['tpr'].rolling(7, center=True).mean()
df_testing_roll.dropna(axis=0, how='any', inplace=True)
df_testing_roll = df_testing_roll.set_index('date')

In [None]:
fig, ax = plt.subplots(figsize=(18, 12))
ax.scatter(df_testing_roll.loc['2020-06-01':, 'tests'], df_testing_roll.loc['2020-06-01':, 'tpr'])
ax.set_xlabel('Tests')
ax.set_ylabel('TPR')
ax.legend()
ax.grid()

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
data = df_testing_roll.loc['2020-06-01':, :]
data = data.loc[:, ['tests', 'tpr']].to_numpy()
X, y = data[:, 0].reshape(-1, 1), data[:, 1]

In [None]:
reg = LinearRegression().fit(X, y)

In [None]:
reg.score(X, y)

In [None]:
reg.predict(X)
fig, ax = plt.subplots(figsize=(18, 12))
ax.scatter(df_testing_roll.loc['2020-06-01':, 'tests'], df_testing_roll.loc['2020-06-01':, 'tpr'], label='True tests vs true TPR')
ax.plot(df_testing_roll.loc['2020-06-01':, 'tests'], reg.predict(X), label='Predicted TPR')
ax.legend()
ax.grid()

In [None]:
p = np.poly1d(np.polyfit(X.reshape(-1, ), np.log(y), 1))

In [None]:
fig, ax = plt.subplots(figsize=(18, 12))
ax.scatter(df_testing_roll.loc['2020-06-01':, 'tests'], df_testing_roll.loc['2020-06-01':, 'tpr'], label='True tests vs true TPR')
ax.plot(df_testing_roll.loc['2020-06-01':, 'tests'], reg.predict(X), label='Predicted TPR (Linear)')
ax.plot(X.reshape(-1, ), np.exp(p(X.reshape(-1, ))), 'r-', label='Predicted TPR (Exp)')
ax.legend()
ax.grid()

In [None]:
fig, ax = plt.subplots(figsize=(18, 12))
tests = np.linspace(3000, 10000, 500)
ax.scatter(df_testing_roll.loc['2020-06-01':, 'tests'], df_testing_roll.loc['2020-06-01':, 'tpr'], label='True tests vs true TPR')
ax.plot(tests, reg.predict(tests.reshape(-1, 1)), label='Predicted TPR (Linear)')
ax.plot(tests, np.exp(p(tests)), 'r-', label='Predicted TPR (Exp)')
ax.plot(tests, [0]*len(tests), '-k', label='Y = 0')
ax.legend()
ax.grid()

In [None]:
fig, ax = plt.subplots(figsize=(18, 12))
tests = np.linspace(3000, 10000, 500)
ax.scatter(df_testing_roll.loc['2020-06-01':, 'tests'], df_testing_roll.loc['2020-06-01':, 'positives'], label='True tests vs true TPR')
ax.plot(tests, tests*reg.predict(tests.reshape(-1, 1))/100, label='Predicted Positives (Linear)')
ax.plot(tests, tests*np.exp(p(tests))/100, 'r-', label='Predicted Positives (Exp)')
ax.plot(tests, [0]*len(tests), '-k', label='Y = 0')
ax.legend()
ax.grid()

In [None]:
scale_up_acc_to_testing()

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
ax.plot(df_subset['date'], df_subset['total_infected'], label='Old')
ax.plot(df_subset['date'], df_subset['new_total_infected'], '--', color='C0', label='New')
ax.legend()
ax.grid()