In [85]:
# Pandas is used for data manipulation
import pandas as pd


In [86]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [87]:
import pandas as pd
import numpy as np

# Read the data in CSV format
soil_data = pd.read_csv('/content/drive/MyDrive/JingyiHuang/soil.txt', header=0, sep=",")

soil_df = soil_data.dropna()


# Convert column 16 (now column 12 after dropping columns) to categorical type
soil_df.iloc[:, 12] = soil_df.iloc[:, 12].astype('category')

# Number of data
nd = soil_df.shape[0]

# Split Calibration: 75% and Validation 25%
np.random.seed(111)  # Set a random seed
ic = np.random.choice(nd, size=round(nd * 0.75), replace=False)  # Generate a random permutation of data

# Form the calibration set
cali = soil_df.iloc[ic, :]

# Form the validation set by selecting the indices not in the calibration set
vali_indices = soil_df.index.difference(ic)
vali = soil_df.loc[vali_indices]

# Print the number of samples in each set to verify
print(f'Number of samples in calibration set: {cali.shape[0]}')
print(f'Number of samples in validation set: {vali.shape[0]}')


Number of samples in calibration set: 2206
Number of samples in validation set: 876


In [88]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Define the features and target variable
features = ['Depth', 'tmax', 'tmin', 'prcp', 'lc', 'clay', 'silt', 'sand', 'dem', 'slope', 'aspect', 'hillshade', 'twi', 'mrvbf']
target = 'SOC'

# Extract features and target variable for calibration set
X_cali = cali[features]
y_cali = cali[target]

# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=500, max_features=10, random_state=42)
rf_model.fit(X_cali, y_cali)

# Predict on the calibration data
rf_predict = rf_model.predict(X_cali)

# Calculate goodness of fit
gof_rf_predict = r2_score(y_cali, rf_predict)

# Calibration
print("Goodness of fit:", gof_rf_predict)


Goodness of fit: 0.9222235623515054


In [89]:
X_cali.columns

Index(['Depth', 'tmax', 'tmin', 'prcp', 'lc', 'clay', 'silt', 'sand', 'dem',
       'slope', 'aspect', 'hillshade', 'twi', 'mrvbf'],
      dtype='object')

In [90]:
from joblib import dump

# Saving the model
dump(rf_model, '/content/drive/MyDrive/JingyiHuang/rf_model.joblib')


['/content/drive/MyDrive/JingyiHuang/rf_model.joblib']

In [92]:
#This is my model
joblib.dump(rf_model, '/content/drive/MyDrive/JingyiHuang/rf_model.pkl')

['/content/drive/MyDrive/JingyiHuang/rf_model.pkl']

In [93]:
from joblib import load

# Calling the model to run an example
rf_model = load('/content/drive/MyDrive/JingyiHuang/rf_model.pkl')

predictions = rf_model.predict(X_cali)  # Por ejemplo, X_test es tu conjunto de datos de prueba


In [94]:
predictions

array([0.2800792 , 0.42556075, 2.37493076, ..., 1.78812623, 1.88244104,
       0.85083708])

### Testing the pickle and API

In [18]:
example1 = {"Depth": 5,
            "tmax": 10.702739716,
            "tmin": 0.5561643839,
            "prcp": 753.0,
            "lc": 9.0,
            "clay": 10.0,
            "silt": 35.0,
            "sand": 55.0,
            "dem": 189,
            "slope": 5.69661e-05,
            "aspect": 6.283185482,
            "hillshade": 0.7853578925,
            "twi": 11.223488808,
            "mrvbf": 2.5688176155
      }

In [104]:
## TEsting the pickle model

features = ['Depth', 'tmax', 'tmin', 'prcp', 'lc', 'clay', 'silt', 'sand', 'dem', 'slope', 'aspect', 'hillshade', 'twi', 'mrvbf']
#dictionary = {key: [value] for key, value in class_model.dict().items()}
df = pd.DataFrame([example])

print(df)

new=rf_model.predict(df[features])[0]
print(new)

   Depth      tmax      tmin   prcp   lc  clay  silt  sand  dem     slope  \
0      5  10.70274  0.556164  753.0  9.0  10.0  35.0  55.0  189  0.000057   

     aspect  hillshade        twi     mrvbf  
0  6.283185   0.785358  11.223489  2.568818  
5.626214404736811


In [25]:
import requests
import json

def build_url(base, path):
    return f"{base}{path}"


In [26]:
def api_request(data):
  data_json = json.dumps(data)

  headers = {
    "Authorization": f"Key {connect_api_key}",
    "accept": "application/json",
    "Content-Type": "application/json"
  }

  # Maybe customize this url
  url_path = "/content/37ea4362-968a-4899-bc03-244f4d92d826/v1/prediction"
  request_url = build_url(connect_server, url_path)

  # Make the POST request with verification disabled
  response = requests.post(request_url, headers=headers, data=data_json, verify=False)

  # Response
  print(f"Status Code: {response.status_code}")
  try:
      response_json = response.json()
      print(response_json)
  except json.JSONDecodeError:
      print("Response content is not in JSON format.")
      print(response.text)

In [27]:
# Define the JSON payload
api_request(example1)


Status Code: 200
{'soil_organic_carbon_stock': 5.626214404736811}


In [28]:
data = {
      "Depth": 60,
      "tmax": 12.42,
      "tmin": 0.45,
      "prcp": 954,
      "lc": 13,
      "clay": 2.0,
      "silt": 10.0,
      "sand": 88.0,
      "dem": 331,
      "slope": 0.03,
      "aspect": 6.28,
      "hillshade": 0.7588,
      "twi": 5.87,
      "mrvbf": 4.72
}

api_request(data)

Status Code: 200
{'soil_organic_carbon_stock': 0.156434471283}
