In [1]:
import pandas as pd
import numpy as np

In [2]:
df1 = pd.read_csv('https://raw.githubusercontent.com/arinaaandreeva/Datasets/main/CO2_emission_2008_13.csv')
df2 = pd.read_csv('https://raw.githubusercontent.com/arinaaandreeva/Datasets/main/CO2_emission_2014.csv')
df = pd.concat([df1, df2], axis=0)
print(df1.shape, df2.shape, df.shape)

(1584, 13) (264, 13) (1848, 13)


In [3]:
df.columns = df.columns.str.lower().str.replace(' ', '_')

In [4]:
strings = list(df.dtypes[df.dtypes == 'object'].index)
strings = strings[1:len(strings)]
strings

['gdp_growth',
 'co2_emission_metrics_per_capita',
 'electricity_access',
 'urban_population',
 'population_growth',
 'urban_population_growth_annual',
 'live_stock_production',
 'forest_area',
 'renewable_energy_use',
 'energy_use_kg_of_oil_per_capita',
 'population_total']

In [5]:
for i in strings:
  df[i]=df[i].apply(lambda x:x.replace('..',''))
  df[i]=df[i].apply(lambda x:x.replace(',',''))
  df[i]=df[i].apply(pd.to_numeric)

df['year']=df['year'].astype(object)

In [6]:
df = df.fillna(0)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import r2_score

In [8]:
ohe = OneHotEncoder()
transformed = ohe.fit_transform(df[['country', 'year']])
print(transformed.toarray())

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]


In [9]:
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)
df_train, df_val = train_test_split(df_full_train, test_size=0.25, random_state=1)

In [10]:
y_train = df_train.co2_emission_metrics_per_capita.values
y_val = df_val.co2_emission_metrics_per_capita.values
y_test = df_test.co2_emission_metrics_per_capita.values

del df_train['co2_emission_metrics_per_capita']
del df_val['co2_emission_metrics_per_capita']
del df_test['co2_emission_metrics_per_capita']

In [11]:
from sklearn.feature_extraction import DictVectorizer
dv = DictVectorizer(sparse=False)

X_train = df_train.reset_index(drop=True)
X_train_dict = X_train.to_dict(orient='records')
X_train = dv.fit_transform(X_train_dict)

X_val = df_val.reset_index(drop=True)
X_val_dict = X_val.to_dict(orient='records')
X_val = dv.fit_transform(X_val_dict)

X_test = df_test.reset_index(drop=True)
X_test_dict = X_test.to_dict(orient='records')
X_test = dv.fit_transform(X_test_dict)

In [12]:
y_full_train = df_full_train['co2_emission_metrics_per_capita'].astype(int)
del df_full_train['co2_emission_metrics_per_capita']

In [13]:
dicts_full_train = df_full_train.to_dict(orient='records')

dv = DictVectorizer(sparse=False)
X_full_train = dv.fit_transform(dicts_full_train)

dicts_test = df_test.to_dict(orient='records')
X_test = dv.transform(dicts_test)

In [14]:
import xgboost as xgb
from xgboost import XGBClassifier
from xgboost.sklearn import XGBClassifier

In [15]:
dfulltrain = xgb.DMatrix(X_full_train, label=y_full_train,
                    feature_names=dv.get_feature_names())

dtest = xgb.DMatrix(X_test, feature_names=dv.get_feature_names())



In [16]:
model = xgb.XGBRegressor(learning_rate = 0.5, n_estimators = 200, seed = 25)
 
model.fit(X_train, y_train)
 
pred = model.predict(X_test)
 
r2 = r2_score(y_test, pred)
rmse= MSE(y_test, pred, squared=False)
print(f"r2 score: {r2.round(3)}   RMSE: {rmse.round(3)}")

r2 score: 0.954   RMSE: 1.191


In [17]:
import bentoml

In [18]:
bentoml.xgboost.save_model('co2_emission_model', model)

Model(tag="co2_emission_model:ghqcetk57oojsehh", path="C:\Users\user\bentoml\models\co2_emission_model\ghqcetk57oojsehh\")

In [19]:
import bentoml
from bentoml.io import JSON

model_ref = bentoml.xgboost.get('co2_emission_model:latest')

#dv = model_ref.custom_objects['DictVectorizer']
# Create the model runner (it can also scale the model separately)
model_runner = model_ref.to_runner()

# Create the service 'credit_risk_classifier' and pass the model
svc = bentoml.Service('co2_emission_classifier', runners=[model_runner])


# Define an endpoint on the BentoML service
@svc.api(input=JSON(), output=JSON()) # decorate endpoint as in json format for input and output
def classify(application_data):
    # transform data from client using dictvectorizer
    vector = dv.transform(application_data)
    # make predictions using 'runner.predict.run(input)' instead of 'model.predict'
    prediction = model_runner.predict.run(vector)
    
    result = prediction[0] # extract prediction from 1D array
    print('Prediction:', result)

    if result > 0.5:
        return {'Status': 'DECLINED'}
    elif result > 0.3:
        return {'Status': 'MAYBE'}
    else:
        return {'Status': 'APPROVED'}

In [27]:
bentoml models get co2_emission_model:ghqcetk57oojsehh

SyntaxError: invalid syntax (Temp/ipykernel_7288/2243472118.py, line 1)

In [25]:
service: "service.py:svc" # Specify entrypoint and service name
labels:  
  owner: bentoml-team
  project: gallery
include:
- "*.py" # A pattern for matching which files to include in the bento build
python:
  packages: # Additional pip packages required by the service
    - xgboost
    - sklearn

SyntaxError: invalid syntax (Temp/ipykernel_7288/3546988627.py, line 2)

In [26]:
Successfully built Bento(tag="co2_emission_model:ghqcetk57oojsehh")

SyntaxError: invalid syntax (Temp/ipykernel_7288/3804905371.py, line 1)

In [28]:
!pip install pydantic



In [29]:
from pydantic import BaseModel

ImportError: cannot import name dataclass_transform