In [120]:
# import requests
# import re
# import urllib.parse
# import json
# import io
# import functools
# 
# class Project:
#     def __init__(self, id:int, server=None):
#         if not id:
#             raise Exception('Invalid project id')
# 
#         self.id = id
#         self.server: Server = server
# 
#     def __repr__(self):
#         return f"Project(id={self.id}; server={self.server.url})"
# 
#     def get_models(self):
#         return self.server.get(
#             'command/core/get-models',
#             query={'project': self.id}
#         ).json()
# 
#     def get_project_url(self):
#         return f"{self.server.url}/project?project={self.id}"
# 
#     def apply_operations(self, operations_json: list):
#         try:
#             r = self.server.post(
#                 'command/core/apply-operations',
#                 query={'project': self.id},  # the docs say this is where it belongs, but reality says it's in the data
#                 data={
#                     'project': self.id,
#                     'operations': json.dumps(operations_json)
#                 }
#             ).json()
#         except Exception as e:
#             raise Exception(f"Apply Operations failed") from e
#         
#         if not r['code'] == 'ok':
#             raise Exception(f"Apply Operations failed: {r['message']}")
#         
#         return self
# 
#     def get_history(self):
#         return self.server.get(
#             'command/core/get-history',
#             query={'project': self.id}
#         ).json()
# 
#     def get_rows(self, start=0, limit=50):
#         return self.server.get(
#             'command/core/get-rows',
#             query={
#                 'project': self.id,
#                 'start': start,
#                 'limit': limit,
#             }
#         ).json()
# 
#     def compute_facets(self, column: str):
#         r = self.server.post(
#             'command/core/compute-facets',
#             query={'project': self.id},  # the docs say this is where it belongs, but reality says it's in the data
#             data={
#                 "engine": json.dumps({
#                     "facets": [
#                         {
#                             "type": "list",
#                             "name": column,
#                             "columnName": column,
#                             "expression": "value",
#                             "omitBlank": False,
#                             "omitError": False,
#                             "selection": [],
#                             "selectBlank": False,
#                             "selectError": False,
#                             "invert": False
#                         }
#                     ],
#                     "mode": "row-based"
#                 })}
#         ).json()
# 
#         errors = functools.reduce(lambda a, v: [*a, v['error']] if v['error']  else a, r['facets'], [])
# 
#         if len(errors) > 0:
#             raise Exception(f"Compute Facets Failed: {';'.join(errors)}")
#         
#         return self
# 
#     def compute_clusters(self, column: str, function: str = 'fingerprint', type: str = 'binning', params: map = {}):
#         return self.server.post(
#             'command/core/compute-clusters',
#             query={'project': self.id},  # the docs say this is where it belongs, but reality says it's in the data
#             data={
#                 "engine": json.dumps({
#                     "facets": [],
#                     "mode": "row-based"
#                 }),
#                 "clusterer": json.dumps({
#                     "type": type,
#                     "function": function,
#                     "column": column,
#                     "params": params
#                 })
#             }
#         ).json()
# 
#     def mass_edit(self, column: str, edits: list = []):
#         return self.server.post(
#             'command/core/mass-edit',
#             query={'project': self.id},  # the docs say this is where it belongs, but reality says it's in the data
#             data={
#                 "columnName": column,
#                 "expression": "value",
#                 "edits": json.dumps(edits),
#                 "engine": json.dumps({"facets": [], "mode": "row-based"})
#             }
#         ).json()
# 
#     def export_rows(self, filename = 'data.csv', format="csv"):
#         content = self.server.post(
#             'command/core/export-rows',
#             query={
#                 'project': self.id, 
#                 'format': 'csv'
#             },
#             data={
#                 "engine": json.dumps({"facets": [], "mode": "row-based"})
#             }
#         ).text
#         with open(filename, 'w') as file:
#             file.write(content)
#         return content
# 
#     def cluster_column(self, column: str):
#         # [ [{ v: "str VALUE", "c": int count? }, { v: "str VALUE", "c": int count? }, ...], ... ]
#         cluster_data = self.compute_clusters(column)
#         # [ {from: ["str", "str"], "to": "str"}, ...]
#         mass_edit_data = []  
#         # print(f"Cluster Data: {cluster_data}")
#         for cluster_list in cluster_data:
#             # print(f"Cluster List: {cluster_list}")
#             to_value = cluster_list[0]['v']
#             from_values = [value['v'] for value in cluster_list]
#             mass_edit_data.append({'from': from_values, 'to': to_value})
#         
#         r = self.mass_edit(column, edits = mass_edit_data)
#         if not r['code'] == 'ok':
#             # print(r)
#             raise Exception(f"Cluster failed: {r['message']}")
#         
#         return self
#         
# 
# 
# class Server:
#     def __init__(self, url: str = "http://localhost:3333"):
#         self.url = url
#         self.token = None
# 
#     @property
#     def is_connected(self):
#         return self.token is not None
# 
#     def get_csrf_token(self) -> str:
#         response = requests.get(f'{self.url}/command/core/get-csrf-token')
#         j = response.json()
#         if 'token' not in j:
#             raise Exception("Invalid response")
# 
#         return j['token']
# 
#     def connect(self):
#         self.token = self.get_csrf_token()
#         return self
# 
#     def disconnect(self):
#         self.token = None
#         return self
# 
#     def get(self, path, query=None, headers=None, *args, **kwargs):
#         self.connect()
# 
#         if query is None:
#             query = {}
#         q = urllib.parse.urlencode({**query, 'csrf_token': self.token})
# 
#         if headers is None:
#             headers = {}
# 
#         headers = {
#             # 'Accept': 'application/xml,*/*;0.8',
#             'Accept': 'application/json, text/javascript, */*; q=0.01',
#             **headers
#         }
# 
#         r = requests.get(f"{self.url}/{path}?{q}", *args, headers=headers, **kwargs)
# 
#         if r.status_code >= 400 and r.status_code < 500:
#             raise Exception(f"Invalid Request: Status Code {r.status_code}")
#         if r.status_code >= 500 and r.status_code < 600:
#             raise Exception(f"Internal Server Error: Status Code {r.status_code}")
# 
#         return r
# 
#     def post(self, path, query=None, headers=None, *args, **kwargs):
#         self.connect()
# 
#         if query is None:
#             query = {}
#         q = urllib.parse.urlencode({**query, 'csrf_token': self.token})
# 
#         if headers is None:
#             headers = {}
# 
#         headers = {
#             'Accept': 'application/json, text/javascript, */*; q=0.01',
#             **headers
#         }
# 
#         req = requests.Request('POST', f"{self.url}/{path}?{q}", *args, headers=headers, **kwargs)
#         prepared_req = req.prepare()
#         # print(f"URL: {prepared_req.url}")
#         # print(f"Headers: {prepared_req.headers}")
#         # print(f"Body: {prepared_req.body}")
# 
#         with requests.Session() as session:
#             r = session.send(prepared_req)
#             # print(r.text)
# 
#         if r.status_code >= 400 and r.status_code < 500:
#             print(r.text)
#             raise Exception(f"Invalid Request: Status Code {r.status_code}")
#         if r.status_code >= 500 and r.status_code < 600:
#             print(r.text)
#             raise Exception(f"Internal Server Error: Status Code {r.status_code}")
# 
#         return r
# 
#     def create_project_from_file(self, file_path: str, name: str = 'New Project') -> Project:
#         path = f'command/core/create-project-from-upload'
#         files = {'project-file': open(file_path, 'rb')}
#         data = {
#             'project-name': 'New Project',
#             'format': 'test/line-based/*sv',
#             'options': {
#                 # "encoding":"UTF-8",
#                 # "separator":",",
#                 # "ignoreLines":-1,
#                 # "headerLines":1,
#                 # "skipDataLines":0,
#                 # "limit":-1,
#                 # "storeBlankRows": True,
#                 # "guessCellValueTypes": True,
#                 # "processQuotes": True,
#                 # "quoteCharacter": "\"",
#                 # "storeBlankCellsAsNulls": True,
#                 # "includeFileSources": False,
#                 # "includeArchiveFileName": False,
#                 # "trimStrings": False,
#                 # "disableAutoPreview": False,
#                 # "projectName": name,
#                 # "projectTags":[]
#             }
#         }
#         headers = {
#             'Accept': 'application/xml,*/*;0.8',
#         }
#         r = self.post(path, data=data, files=files, headers=headers)
# 
#         if not 'project=' in r.url:
#             print(r.text)
#             raise Exception('Project Creation Failure')
# 
#         v = re.search(r'^.+project=(\d+).*$', r.url, re.I | re.S)
# 
#         return Project(int(v.group(1)), server=self)
# 
#     def get_metadata(self):
#         return self.get(
#             'command/core/get-all-project-metadata',
#         ).json()
# 
#     def get_project(self, id: int):
#         return Project(int(id), server=self)
# 
#     def get_all_projects(self):
#         metadata = self.get_metadata();
#         r = set()
#         for id, proj in metadata['projects'].items():
#             r.add(Project(int(id), server=self))
#         return r
# 
#     def delete_project(self, project_id):
#         data = {'project': project_id};
#         return self.post(
#             'command/core/delete-project',
#             data=json.dumps(data),
#             query=data,
#         ).json()
# 
#     def __repr__(self):
#         return f"Server(url={self.url}; token={self.token})"

In [121]:
server = Server()

In [122]:

project = server.create_project_from_file('data/Dish_sm.csv', 'Dish')

In [123]:
op = [
    {
        "op": "core/column-addition",
        "engineConfig": {
            "facets": [],
            "mode": "row-based"
        },
        "baseColumnName": "name",
        "expression": 'grel:value.trim().toLowercase().replace(\" & \",\" and \").replace(/[\\;\\:\\.\\,\\>\\<\\/\\?\\[\\]\\{\\}\\(\\)\\*\\&\\^\\%\\$\\#\\@\\!\\-\\+\\=\\_]/, \"\")',
        "onError": "set-to-blank",
        "newColumnName": "norm_name",
        "columnInsertIndex": 2,
        "description": "Create column norm_name"
    }

]

project.apply_operations(op)


Project(id=2598948882127; server=http://localhost:3333)

In [125]:
# project.compute_facets('norm_name')
project.cluster_column('norm_name')

Project(id=2598948882127; server=http://localhost:3333)

In [126]:
project.get_project_url()

'http://localhost:3333/project?project=2598948882127'

In [None]:
project.export_rows()

In [7]:
def delete_all_projects(server):
    for p in server.get_all_projects():
        print(f"Deleting {p.id}:", server.delete_project(p.id))


# delete_all_projects(server)

Deleting 2052714969294: {'code': 'ok'}
Deleting 2366856242127: {'code': 'ok'}
Deleting 2212409039842: {'code': 'ok'}
Deleting 1794677370548: {'code': 'ok'}


In [1]:
None or 10

10