In [15]:
from pandera.typing import Series
from hypothesis import given
import pandera as pa
import pandas as pd
import os
import requests

In [13]:
class OutputSchema(pa.SchemaModel):
    year: Series[int] = pa.Field(nullable=False, ge=1996, le=2016)
    total: Series[float] = pa.Field(nullable=True,ge=0., le=10000000000.)
    protected: Series[float] = pa.Field(nullable=True, ge=0., le=10000000000.)
    location_id: Series[str] = pa.Field(nullable=False, allow_duplicates=True)
    # checks while trying to generate a strategy and a example seems to fail, i need to investigate further
    @pa.check("location_id")
    def location_id_check(cls, series: Series[str]) -> Series[bool]:
        """Check that location_id is a valid location_id"""
        # TODO: substitute Array with the locations ids array
        return series.isin(['cow', 'lama'])

def transformationPipe(filePath: str, sheet: str = 'Sheet3') -> str:
    """
    Transform the dataframe from the excel file to a pandas dataframe
    Args:
        filePath (str): path to the excel file
        sheet (str, optional): name of the sheet in the excel file. Defaults to 'Sheet3'.

    Returns:
        str: path to the transformed dataframe
    """
    client_provided = pd.read_excel(filePath, sheet_name = sheet)
    # do any transformation here

    validated = OutputSchema.validate(client_provided)
    if not validated:
        raise ValueError('The data is not valid')
    
    # save the validated data
    outputPath = 'data/output/test-mangroves.csv'
    validated.to_csv(outputPath, index=False)
    
    return outputPath

def uploadDataApi(filePath: str, endpoint: str) -> str:
    """
    upload the data to the api
    Args:
        filePath (str): path to the file to upload
        endpoint (str): endpoint to upload to

    Returns:
        str: url of the uploaded file
    """
    files = {'file': open(filePath, 'rb')}
    r = requests.post(endpoint, files=files)
    return r.text

In [12]:
# Generate a random dataframe with our defined schema.
# df_test = Schema.example(size=100)
# df_test.head()

In [None]:
# Executes the transformation pipe.
# cleanedFile = transformationPipe('', '')
# uploadDataApi(cleanedFile, '')