In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

# Import functions from file
import sys, os
dir_path = os.path.dirname(os.path.realpath('functions.py'))
sys.path.append(str(dir_path)+'/')
from functions import *

### Import google sheet with wget

In [None]:
%%bash
wget "https://docs.google.com/spreadsheets/u/1/d/1hTYwrjFdEp5xUxz_LDA3ZmqB3ux1QMvU9GQsWcnbhCA/export?format=csv&id=1hTYwrjFdEp5xUxz_LDA3ZmqB3ux1QMvU9GQsWcnbhCA&gid=1901126947" -O example.csv

In [None]:
!ls

Loading csv into pandas dataframe

In [None]:
example = pd.read_csv('example.csv')
example.head()

### Convert Time_Spent and Expected_Time to decimal format

In [None]:
time = example['Time_Spent'][0]
print(time)

In [None]:
decimal = convert_mins_to_decimal(str(time))
print(decimal)

Why when I do the following lambda function on `example_converted['Time_Spent']` does it also affect `example`???

In [None]:
example_converted = example.copy()
example_converted['Time_Spent'] = example_converted['Time_Spent'].map(lambda time: convert_mins_to_decimal(str(time)))
example_converted['Expected_Time_Spent'] = example_converted['Expected_Time_Spent'].map(lambda time: convert_mins_to_decimal(str(time)))

In [None]:
example.head()

In [None]:
example_converted.head()

### Plot Expected time vs time spent

In [None]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
f, ax = plt.subplots(figsize=(6, 6))
plt.plot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], 'o')
# plt.yscale('log')
# plt.xscale('log')
plt.xlabel('Expected Time')
plt.ylabel('Time Spent')

y_max = max(example_converted['Time_Spent'])
x_max = max(example_converted['Expected_Time_Spent'])
if y_max > x_max:
    lim = y_max + 1
else:
    lim = x_max + 1
ax.set(xlim=(0, lim), ylim=(0, lim))
diag_line, = ax.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3")

plt.show()

In [None]:
# example data
example_converted['Time_Spent'].plot.hist(alpha=0.5)
example_converted['Expected_Time_Spent'].plot.hist(alpha=0.5)
plt.show()

In [None]:
import seaborn as sns

In [None]:
ax = sns.kdeplot(example_converted['Time_Spent'], cut=0, shade=True)
ax = sns.kdeplot(example_converted['Expected_Time_Spent'], cut=0, shade=True)
plt.xlabel('Time (hrs)')
plt.show()

In [None]:
sns.jointplot(x="Expected_Time_Spent", y="Time_Spent", data=example_converted, kind="kde")
plt.show()

In [None]:
g = sns.jointplot(x="Expected_Time_Spent", y="Time_Spent", data=example_converted, kind="kde", color="m")
g.plot_joint(plt.scatter, c="grey", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("Expected_Time_Spent", "Time_Spent");

In [None]:
ax = sns.kdeplot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'])

In [None]:
ax = sns.kdeplot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], shade=True)

In [None]:
f, ax = plt.subplots(figsize=(6, 6))
plt.plot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], 'o')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Expected Time')
plt.ylabel('Time Spent')
# ax = sns.kdeplot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'])
ax = sns.kdeplot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], n_levels=30, cmap="Purples_d")

y_max = max(example_converted['Time_Spent'])
x_max = max(example_converted['Expected_Time_Spent'])
if y_max > x_max:
    lim = y_max + 1
else:
    lim = x_max + 1
ax.set(xlim=(0.1, lim), ylim=(0.1, lim))
diag_line, = ax.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3")

In [None]:
f, ax = plt.subplots(figsize=(6, 6))
# plt.plot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], 'bo')
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Expected Time')
plt.ylabel('Time Spent')
ax = sns.kdeplot(example_converted['Expected_Time_Spent'], example_converted['Time_Spent'], shade=True)

y_max = max(example_converted['Time_Spent'])
x_max = max(example_converted['Expected_Time_Spent'])
if y_max > x_max:
    lim = y_max + 1
else:
    lim = x_max + 1
ax.set(xlim=(0.1, lim), ylim=(0.1, lim))
diag_line, = ax.plot(ax.get_xlim(), ax.get_ylim(), ls="--", c=".3")

In [None]:
#fitting with scipy
fake_data_x = np.linspace(0, 10, 100)
fake_data_y = np.exp(-np.linspace(0, 10, 100)) + 0.1*np.sin(10*np.linspace(0, 10, 100))

def fit_func(t, r):
    return r * np.exp(-r * t) #this is an exponential distribution, but it could be anything

fit_est, fit_cov = curve_fit(fit_func, fake_data_x, fake_data_y)
print(fit_est)
print(fit_cov)

plt.figure()
plt.plot(fake_data_x, fake_data_y)
plt.plot(fake_data_x, fit_func(fake_data_x, fit_est[0]))
plt.show()
