In [118]:
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib

In [119]:
Dataset = pd.read_csv('Dataset_modelo.csv')
Dataset.head()

Unnamed: 0,Country,Country Code,Net migration,Total poblation,PBI per capita,Remesas,Homicidios
0,Aruba,ABW,1004.0,106537.0,29342.1,111331600.0,0.0
1,Angola,AGO,32066.0,34503774.0,1953.53,445400100.0,0.0
2,Albania,ALB,-69998.0,2811666.0,6492.87,140344300.0,2.12
3,Argentina,ARG,24000.0,45808747.0,10636.12,588682500.0,5.35
4,Armenia,ARM,-24989.0,2790974.0,4966.51,352775600.0,1.75


In [120]:
Dataset = pd.concat([Dataset, Dataset], ignore_index=True)
shuffled_data = Dataset.sample(frac=1).reset_index(drop=True)

In [121]:
scaler = StandardScaler()
columns_to_standardize = ["Net migration", "Total poblation", "PBI per capita", "Remesas", "Homicidios"]
Dataset[columns_to_standardize] = scaler.fit_transform(Dataset[columns_to_standardize])

In [122]:
X = Dataset[["Total poblation", "PBI per capita", "Remesas", "Homicidios"]]
y = Dataset['Net migration']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=26)

In [123]:
model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [124]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

Mean Squared Error: 0.006793047857430309
R^2 Score: 0.9179116383165411


In [125]:
cv_scores = cross_val_score(model, X, y, cv=5)
print("Cross Validation Scores:", cv_scores)
print("Cross Validation Accuracy:", cv_scores.mean())

Cross Validation Scores: [0.99341015 0.99241382 0.99572689 0.99764907 0.99241546]
Cross Validation Accuracy: 0.9943230781745692


In [126]:
joblib.dump(model, "modelo_entrenado.joblib")
joblib.dump(scaler, "scaler_entrenado.joblib")

['scaler_entrenado.joblib']