In [1]:
from vizier.datastore.histore.base import HistoreDatastore
from vizier.engine.packages.pycell.client.histore import VizierDBClient

from openclean import op
from openclean.function import Sum

TMP_DIR = './.tmp'

vizierdb = VizierDBClient(datastore=HistoreDatastore(TMP_DIR), datasets=dict())

In [2]:
df = vizierdb.load(name='abuse report', filename='../data/etnx-8aft.tsv')
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Risk of Sexual Abuse,Risk of Harm,Emotional/Neglect
0,2018,Male,98,60,21,293,29
1,2018,Female,69,306,54,253,29
2,2017,Male,89,62,47,267,17
3,2017,Female,70,286,36,244,18
4,2016,Male,76,52,47,212,17
5,2016,Female,57,259,68,216,20
6,2015,Male,105,63,61,212,24
7,2015,Female,66,275,89,207,27
8,2014,Male,88,95,82,196,14
9,2014,Female,73,285,134,177,17


In [3]:
df = op.order_by(df, columns=['Calendar Year', 'Gender'], reversed=[False, True])
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Risk of Sexual Abuse,Risk of Harm,Emotional/Neglect
8,2014,Male,88,95,82,196,14
9,2014,Female,73,285,134,177,17
6,2015,Male,105,63,61,212,24
7,2015,Female,66,275,89,207,27
4,2016,Male,76,52,47,212,17
5,2016,Female,57,259,68,216,20
2,2017,Male,89,62,47,267,17
3,2017,Female,70,286,36,244,18
0,2018,Male,98,60,21,293,29
1,2018,Female,69,306,54,253,29


In [4]:
df = op.inscol(df, 'Abuse Total', values=Sum('Physical Abuse', 'Sexual Abuse'), pos=4)
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Abuse Total,Risk of Sexual Abuse,Risk of Harm,Emotional/Neglect
8,2014,Male,88,95,183,82,196,14
9,2014,Female,73,285,358,134,177,17
6,2015,Male,105,63,168,61,212,24
7,2015,Female,66,275,341,89,207,27
4,2016,Male,76,52,128,47,212,17
5,2016,Female,57,259,316,68,216,20
2,2017,Male,89,62,151,47,267,17
3,2017,Female,70,286,356,36,244,18
0,2018,Male,98,60,158,21,293,29
1,2018,Female,69,306,375,54,253,29


In [5]:
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Abuse Total,Risk of Sexual Abuse,Risk of Harm,Emotional/Neglect
8,2014,Male,88,95,183,82,196,14
9,2014,Female,73,285,358,134,177,17
6,2015,Male,105,63,168,61,212,24
7,2015,Female,66,275,341,89,207,27
4,2016,Male,76,52,128,47,212,17
5,2016,Female,57,259,316,68,216,20
2,2017,Male,89,62,151,47,267,17
3,2017,Female,70,286,356,36,244,18
0,2018,Male,98,60,158,21,293,29
1,2018,Female,69,306,375,54,253,29


In [6]:
df = op.inscol(df, 'Risk Total', values=Sum('Risk of Sexual Abuse', 'Risk of Harm'), pos=7)
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Abuse Total,Risk of Sexual Abuse,Risk of Harm,Risk Total,Emotional/Neglect
8,2014,Male,88,95,183,82,196,278,14
9,2014,Female,73,285,358,134,177,311,17
6,2015,Male,105,63,168,61,212,273,24
7,2015,Female,66,275,341,89,207,296,27
4,2016,Male,76,52,128,47,212,259,17
5,2016,Female,57,259,316,68,216,284,20
2,2017,Male,89,62,151,47,267,314,17
3,2017,Female,70,286,356,36,244,280,18
0,2018,Male,98,60,158,21,293,314,29
1,2018,Female,69,306,375,54,253,307,29


In [7]:
df = vizierdb.commit('abuse report', df)
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Abuse Total,Risk of Sexual Abuse,Risk of Harm,Risk Total,Emotional/Neglect
8,2014,Male,88,95,183,82,196,278,14
9,2014,Female,73,285,358,134,177,311,17
6,2015,Male,105,63,168,61,212,273,24
7,2015,Female,66,275,341,89,207,296,27
4,2016,Male,76,52,128,47,212,259,17
5,2016,Female,57,259,316,68,216,284,20
2,2017,Male,89,62,151,47,267,314,17
3,2017,Female,70,286,356,36,244,280,18
0,2018,Male,98,60,158,21,293,314,29
1,2018,Female,69,306,375,54,253,307,29


In [8]:
df = vizierdb.checkout('abuse report')
df

Unnamed: 0,Calendar Year,Gender,Physical Abuse,Sexual Abuse,Abuse Total,Risk of Sexual Abuse,Risk of Harm,Risk Total,Emotional/Neglect
8,2014,Male,88,95,183,82,196,278,14
9,2014,Female,73,285,358,134,177,311,17
6,2015,Male,105,63,168,61,212,273,24
7,2015,Female,66,275,341,89,207,296,27
4,2016,Male,76,52,128,47,212,259,17
5,2016,Female,57,259,316,68,216,284,20
2,2017,Male,89,62,151,47,267,314,17
3,2017,Female,70,286,356,36,244,280,18
0,2018,Male,98,60,158,21,293,314,29
1,2018,Female,69,306,375,54,253,307,29


In [9]:
import pandas as pd
df = pd.DataFrame(data=[[1, 2], [3, 4]], columns=['A', 'B'])
df = vizierdb.create('test', df)

def pow(x):
    return x*x

df = op.update(df, 'A', pow)
df

Unnamed: 0,A,B
0,1,2
1,9,4


In [10]:
df = op.update(df, 'A', Sum('A', 'B'))
df

Unnamed: 0,A,B
0,3,2
1,13,4


In [11]:
df = vizierdb.commit('test', df)
df

Unnamed: 0,A,B
0,3,2
1,13,4


In [12]:
# Clean up the temp directory
import shutil

shutil.rmtree(TMP_DIR)