In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score

# for exporting the model for the API
import joblib

df = pd.read_csv("CO2Emissions.csv")
df # view data frame

x = df.drop(['Make', 'Model', 'Vehicle Class', 'Fuel Consumption City (L/100 km)', 'Fuel Consumption Hwy (L/100 km)'], axis=1)

# Handling missing values
pd.isnull(df).any() # checking which columns have nan values
# data does not have any nan values!!

# extracting independent values (engine size, cylinders, transmission, fuel type, fuel consumption)
x = df.loc[:, ['Engine Size(L)', 'Cylinders', 'Transmission', 'Fuel Type', 'Fuel Consumption Comb (L/100 km)']]
x

# extracting the dependent value (CO2 Emissions)
y = df.loc[:, ['CO2 Emissions(g/km)']]
y

# encoding categorical values (transmission & fueltype)
categorical_columns = ['Transmission', 'Fuel Type']

# create column transformer to encode any columns
transformed_data = ColumnTransformer(
    transformers = [
        ('cat', OneHotEncoder(), categorical_columns) # convert any category columns using OneHotEncoder
    ],
    remainder = "passthrough" # skip any columns not specified
)

# transform current x data to fit my transform request
x = transformed_data.fit_transform(x)
print(x)

# splitting datasets into training and testing groups
x_train, x_test, y_train, y_test = train_test_split(x, df['CO2 Emissions(g/km)'], test_size=0.2, random_state=0)

st_x = StandardScaler(with_mean=False)

x_train = st_x.fit_transform(x_train)
x_test = st_x.transform(x_test)

model = RandomForestRegressor(n_estimators=100, random_state=0)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
y_pred

accuracy = model.score(x_test, y_test)
print("Accuracy score: ", accuracy)

joblib.dump(model, "vehicle_emissions_model.pkl")
joblib.dump(transformed_data, "transformed_data.pkl")