In [1]:
import ridgeplot as rp
import plotly
import numpy as np
import pandas as pd
import requests
from dataclasses import dataclass
from enum import Enum
import time



In [2]:
# each function is a conversion from the unit to DALY/dollar.
class Units(Enum):
    daly_over_dollar = lambda x: x
    dollar_over_daly = lambda x: 1/x
    in_gd_daly_over_dollar = lambda x: x*0.00335/0.4 # GW's estimate of GiveDirectly's impact is 0.00335 doubling of consumption per dollar (https://docs.google.com/spreadsheets/d/18ROI6dRdKsNfXg5gIyBa1_7eYOjowfbw5n65zkrLnvc/edit#gid=1680005064), one doubling of consumption is about 0.4 DALYs

@dataclass
class Spreadsheet:
    name: str
    url: str
    result_cell_coordinate: str
    units: Units = Units.daly_over_dollar
    result_worksheet: str = ""

    def __hash__(self) -> int:
        return hash(self.name + self.url + self.result_cell_coordinate + self.result_worksheet)

In [3]:
spreadsheets = [
    Spreadsheet(
        "Replacement of Iron supplementation",
        "https://docs.google.com/spreadsheets/d/1342XLGqjpLXV4pHXINz2SrzAora2xhm-99x6slt64Xs/",
        "E72",
        Units.dollar_over_daly,
        "Daggered",
    ),
    Spreadsheet(
        "GiveWell IFA CEA",
        "https://docs.google.com/spreadsheets/d/1_ttwAj4rH9rDhqGeil01hBmmrdP2qjIF2RvVMB5BDT8/edit#gid=0",
        "B96",
        Units.in_gd_daly_over_dollar,
        "Main",
    )
]

In [4]:
class Dagger:
    def __init__(self, spreadsheets, sensitivity=False):
        self.spreadsheets = spreadsheets
        self.sensitivity = sensitivity
        self.runs = {spreadsheet: {'id':"", 'result':None, 'status':"NOT_STARTED"} for spreadsheet in spreadsheets}
        
    def run(self):
        self.start_jobs()
        self.get_all_results()
    
    @staticmethod
    def spreadsheet_to_params(spreadsheet: Spreadsheet, sensitivity: bool):
        return {
            "url": spreadsheet.url,
            "result_worksheet": spreadsheet.result_worksheet,
            "result_cell_coordinate": spreadsheet.result_cell_coordinate,
            "sensitivity": sensitivity,
        }
    
    def start_jobs(self):
        post_url = 'https://usedagger.com/api/spreadsheet/'
        for spreadsheet in self.spreadsheets:
            params = self.spreadsheet_to_params(spreadsheet, self.sensitivity)
            result = requests.post(post_url, json=params)
            self.runs[spreadsheet]["id"] = result.json()['id']
            self.runs[spreadsheet]["status"] = "STARTED"

    @staticmethod
    def get_results(id):
        results = requests.get(f'https://usedagger.com/api/sim/{id}')
        status = results.json()['status']['status']
        if status == "SUCCESS":
            return status, pd.DataFrame(results.json()['output']['simulation_data'])
        return status, None
            
    def get_all_results(self, attempts=20, wait=5):
        for spreadsheet in self.spreadsheets:
            if self.runs[spreadsheet]['status'] == "SUCCESS":
                continue
            print(f"Getting results for {spreadsheet.name}")
            for attempt in range(attempts):
                status, result = self.get_results(self.runs[spreadsheet]['id'])
                self.runs[spreadsheet]['status'] = status
                if status == "SUCCESS":
                    self.runs[spreadsheet]['result'] = result
                    break
                if status == "STARTED":
                    time.sleep(wait)
                    continue
                
            if attempt == attempts - 1:
                print(f"Error: could not get results for {spreadsheet.name} after {attempts} attempts")
                print(f"Error: {status} for {spreadsheet.name}")
                print(f"Error: {result}")
                self.runs[spreadsheet]['status'] = "TOO_LONG"
            print(f"Got results for {spreadsheet.name}, status: {self.runs[spreadsheet]['status']}")

    def get_results_df(self):
        df = pd.concat([
            self.runs[spreadsheet]['result'][[spreadsheet.result_worksheet+'!'+spreadsheet.result_cell_coordinate]].rename(
                columns=lambda x: spreadsheet.name
            ).apply(spreadsheet.units) 
            for spreadsheet in self.spreadsheets
            ], axis="columns")
        return df




In [5]:
run = Dagger(spreadsheets)

In [6]:
run.run()

Getting results for Replacement of Iron supplementation
Got results for Replacement of Iron supplementation, status: SUCCESS
Getting results for GiveWell IFA CEA
Got results for GiveWell IFA CEA, status: SUCCESS


In [7]:
df = run.get_results_df()
df.head()

Unnamed: 0,Replacement of Iron supplementation,GiveWell IFA CEA
0,0.088624,0.037614
1,0.056406,0.05697
2,0.038378,0.031059
3,0.021959,0.056079
4,0.070284,0.053238


In [8]:
fig = rp.ridgeplot(samples=df.to_numpy().T, labels=df.columns.to_list())


In [9]:
fig

In [10]:
fig.show("json")

In [11]:
import plotly.express as px
import plotly.graph_objects as go



In [12]:
fig = go.Figure()
for column in df.columns:
    fig.add_trace(go.Violin(x=df[column]))


In [13]:
fig.update_traces(points=False, meanline_visible=True, side='positive', orientation='h', width=3)
fig.update()