In [None]:
%run ../python_classes/Emulator.ipynb

# Emulator class

Here we inspect methods of the Emulator class.

Create an emulator. For this example, we will just view July 1st at midnight.

In [None]:
a = Emulator(
)

myFeaturesNames = ['acure_sea_spray', 'acure_dry_dep_acc', 'acure_bvoc_soa']

In [None]:
a = Emulator(
    region=[-1, 0, -1, 0],
    excludeFeats=list(set(a.__get_input_names__()) - set(myFeaturesNames)),
    start_time="2017-07-01T10:00:00",
    end_time="2017-07-01T13:59:59",
    centerTarget=True
)

In [None]:
a.__dict__

# Pixelwise

## Train

Train a model for each pixel.

In [None]:
job = a.train()

In [None]:
job.status()

In [None]:
a.__dict__

Collect the parameters from each of the models.

In [None]:
params = a.__retrieve_emulator_pixelwise__()

In [None]:
params.status()

Display the learned parameters, excluding duplicates if necessary.

In [None]:
df = a.__retrieve_pixelwise_parameters__(params)
df

## Emulate

In [None]:
emu = a.__emulate_pixelwise__(nVariants=50)

In [None]:
emu.status()

In [None]:
a.excludeFeats

In [None]:
a.__make_filter__()

In [None]:
a.targetName

In [None]:
a.__make_synth_dataset__(50)

In [None]:
my_GPR_technique = c3.GaussianProcessRegressionTechnique(
    randomState=42,
    kernel=a.GPR_kernel,
    centerTarget=True
).upsert()

In [None]:
my_GPR_technique.centerTarget=True

In [None]:
my_GPR_technique

In [None]:
job = c3.PredictAODGPR.makePredictionsJob(
            a.excludeFeats,
            a.__make_filter__(),
            a.targetName,
            a.__make_synth_dataset__(50), ###
            my_GPR_technique,
            a.batchSize)

In [None]:
job = c3.AODGPRModelFinder.extractLearnedParametersJob(
            a.excludeFeats,
            a.__make_filter__(),
            a.targetName,
            #a.__make_synth_dataset__(50), ###
            my_GPR_technique,
            a.batchSize)

In [None]:
job.status()

## Fit manually

In [None]:
y_train = get_average_response(
    time1="2017-07-01T00:00:00",
    time2="2017-07-01T02:59:59",
    lat1 = -1.0,
    lat2 = 0.0,
    lon1 = -22.0,
    lon2 = -21.0
)

X_train = get_inputs()

In [None]:
# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*len(myFeaturesNames), nu=0.5, coefficient=1.0).build().kernel.upsert()

# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
    randomState=42,
    kernel=GPR_kernel,
    centerTarget=True
).upsert()

In [None]:
# create pipe
GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique
)

In [None]:
GPR_trained = GPR_pipe.train(
    input=c3.Dataset.fromPython(X_train),
    targetOutput=c3.Dataset.fromPython(pd.DataFrame(y_train))
)

In [None]:
x = dict(zip(
    myFeaturesNames,
    c3.PythonSerialization.deserialize(serialized=GPR_trained.trainedModel.model).kernel_.get_params()['k2__length_scale']
))

dict(sorted(x.items(), key=lambda item: item[1]))

In [None]:
df2.iloc[0, :]

## Get parameters manually

In [None]:
# Filter data for job
gstpFilter = a2.make_filter()

In [None]:
GPR_kernel = c3.SklearnGPRKernelMatern(
    lengthScale=[1.0]*len(a2.inputs),
    nu=0.5,
    coefficient=1.0
).build().kernel

In [None]:
GPR_technique = c3.GaussianProcessRegressionTechnique(
    randomState = 42,
    kernel = GPR_kernel,
    centerTarget = a2.centerTarget
)

new_job = c3.AODGPRModelFinder.extractLearnedParametersJob(a2.excludeFeats, gstpFilter, "all", GPR_technique, 10)

In [None]:
new_job.status()

In [None]:
df = c3.AODGPRModelFinder.getDataframeFromJob(new_job)
df[["latitude", "longitude"]] = df[["latitude", "longitude"]].applymap(lambda x:float(x))
df["time"] = pd.DataFrame(df["time"]).applymap(lambda x:pd.Timestamp(str(x)))

In [None]:
df.drop_duplicates(
    subset=["latitude", "longitude", "time"]
).reset_index(
    drop=True
).rename(
    columns=dict(zip(df.columns, a.inputs))
)

## In development: Regional emulator

In [None]:
emulator = a.train(pixelwise=False, featureNames=a.inputs[53:], lengthscales=df, labels=[1]*df.shape[0])

In [None]:
df_c = a.emulate_variant(pipe=emulator, queryVariant=np.random.rand(1, len(a.regionalFeatureNames))[0], lengthScales=df)

In [None]:
df_c

In [None]:
a.plot_variant(df_c)

In [None]:
import numpy as np
import pandas as pd

In [None]:
with open("lengthscale_tables/2017-07-01T00:00:00_2017-07-07T23:59:59", "r") as f:
    df = pd.read_csv(f)

In [None]:
df

In [None]:
model_ids = list(df.loc[0:10, "modelId"])

In [None]:
c3.StagedFeatures.stageFromAODGPRModelIdsList(model_ids)

In [None]:
c3.StagedTargets.stageFromAODGPRModelIdsList(model_ids)

In [None]:
# create a pipe
kernelLen = 19
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

GPR_technique = c3.GaussianProcessRegressionTechnique(
    randomState=42,
    kernel=GPR_kernel,
    centerTarget=True
).upsert()

GPR_dataspec = c3.GPRDataSourceSpec(
    featuresType = c3.TypeRef(
        typeName="StagedFeatures"
    ),
    targetType=c3.TypeRef(
        typeName="StagedTargets"
    )
).upsert()

GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique,
    dataSourceSpec=GPR_dataspec
)

In [None]:
# check features
features = GPR_pipe.getFeatures()
df_feats = c3.Dataset.toPandas(features)
df_feats

In [None]:
# check targets
targets = GPR_pipe.getTarget()
df_targets = c3.Dataset.toPandas(targets)
df_targets

In [None]:
GPR_trained = GPR_pipe.train(input=features, targetOutput=targets)

In [None]:
dict(zip(df_feats, c3.PythonSerialization.deserialize(serialized=GPR_trained.trainedModel.model).kernel_.get_params()['k2__length_scale']))

In [None]:
lengthScales = df.iloc[0:3, :]
labels = [0, 1, 2]

In [None]:
regem = a2.train(pixelwise=False, lengthScales=lengthScales, labels=labels)

In [None]:
regem

In [None]:
queryVariant = pd.DataFrame(list(np.random.uniform(0, 1, 17)) + [-0.625, -0.9375]).transpose()
synth = c3.Dataset.fromPython(pythonData=queryVariant)

In [None]:
for em in regem:
    pipe = c3.GaussianProcessRegressionPipe.get(regem[em].id)
    y_c = pipe.process(input=synth)
    print(c3.Dataset.toPandas(y_c))
    
    model = c3.PythonSerialization.deserialize(serialized=pipe.trainedModel.model)
    print(model.kernel_)