In [9]:
# Packages

import pandas as pd
import requests
import json
from dotenv import load_dotenv
import os
load_dotenv()
from rapid import Rapid
from rapid import RapidAuth
from rapid.items.schema import SchemaMetadata, SensitivityLevel, Owner, Column
from rapid.exceptions import ColumnNotDifferentException, DataFrameUploadValidationException
from rapid.patterns import dataset

ImportError: cannot import name 'AuthenticationErrorException' from 'rapid' (/Users/alexandra.pop/opt/anaconda3/envs/rapid/lib/python3.10/site-packages/rapid/__init__.py)

In [None]:
# Load environment variables from .env file
load_dotenv()

# Retrieve the variables from the environment
client_id = os.getenv('RAPID_CLIENT_ID')
client_secret = os.getenv('RAPID_CLIENT_SECRET')
api_url = os.getenv('RAPID_URL')

# Print to verify the values are loaded
print(f"CLIENT_ID: {client_id}")
print(f"CLIENT_SECRET: {client_secret}")
print(f"API_URL: {api_url}")

# Initialize the RapidAuth class with the environment variables
try:
    auth = RapidAuth(client_id=client_id, client_secret=client_secret, url=api_url)
    auth.validate_credentials()
    print("Authentication successful")
except AuthenticationErrorException as e:
    print(f"Authentication failed: {e}")

In [8]:
# Connect to the API
rapid_authentication = RapidAuth()

rapid = Rapid(auth=rapid_authentication)

AuthenticationErrorException: Auth not configured, could not connect to instance of rAPId

In [3]:
# Create and upload schema


raw_data = [{"a": 1, "b": 2, "c": 3}, {"a": 10, "b": 20, "c": 30}]
df = pd.DataFrame(raw_data)

metadata = SchemaMetadata(
    layer="default",
    domain="test",
    dataset="test_dummy",
    owners=[Owner(name="Alex P", email="alexandra.pop@digital.cabinet-office.gov.uk")],
    sensitivity=SensitivityLevel.PUBLIC.value,
)

try:
    dataset.upload_and_create_dataset(
        rapid=rapid,
        df=df,
        metadata=metadata,
        upgrade_schema_on_fail=False,
    )
except ColumnNotDifferentException:
    print("Columns not different.")

In [9]:
# Update schema with upload


raw_data = [{"a": 6, "b": 7, "c": 2}, {"a": 11, "b": 24, "c": 43}]
df = pd.DataFrame(raw_data)

try:
    response= rapid.upload_dataframe(
        layer="default",
        domain="test",
        dataset="test_dummy",
        df=df,
        wait_to_complete=True
    )
except ColumnNotDifferentException:
    print("Columns not different.")

In [11]:
# Fetch dataset info from API

data = rapid.fetch_dataset_info(
        layer="default", domain="test", dataset="test_dummy")


In [12]:
data

{'metadata': {'layer': 'default',
  'domain': 'test',
  'dataset': 'test_dummy',
  'version': 1,
  'sensitivity': 'PUBLIC',
  'description': '',
  'key_value_tags': {},
  'key_only_tags': [],
  'owners': [{'name': 'Alex P',
    'email': 'alexandra.pop@digital.cabinet-office.gov.uk'}],
  'update_behaviour': 'APPEND',
  'is_latest_version': True,
  'number_of_rows': 8,
  'number_of_columns': 3,
  'last_updated': 'Never updated'},
 'columns': [{'name': 'a',
   'partition_index': None,
   'data_type': 'int',
   'allow_null': True,
   'format': None,
   'statistics': None},
  {'name': 'b',
   'partition_index': None,
   'data_type': 'int',
   'allow_null': True,
   'format': None,
   'statistics': None},
  {'name': 'c',
   'partition_index': None,
   'data_type': 'int',
   'allow_null': True,
   'format': None,
   'statistics': None}]}

In [None]:
# Delete test_dummy dataset




In [15]:
# Download dataset

data = rapid.download_dataframe(
        layer="default", domain="test", dataset="test_dummy", version=1)


<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   a       8 non-null      int64
 1   b       8 non-null      int64
 2   c       8 non-null      int64
dtypes: int64(3)
memory usage: 256.0 bytes


  return pd.read_json(json.dumps(data), orient="index")


In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   a       8 non-null      int64
 1   b       8 non-null      int64
 2   c       8 non-null      int64
dtypes: int64(3)
memory usage: 256.0 bytes


In [19]:
print(data)

    a   b   c
0   1   2   3
1  10  20  30
2   6   7   2
3  11  24  43
4   6   7   2
5   1   2   3
6  10  20  30
7  11  24  43
