In [84]:
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib

In [85]:
Dataset = pd.read_csv('Dataset_modelo.csv')
Dataset

Unnamed: 0,Country Name,Year,Homicides,Poblacion,Remesas,Net migration,Expulsor,Receptor
0,Afghanistan,1990,1538.0,10694796,1.439791e+08,-1525351.0,1,0
1,Afghanistan,1991,2001.0,10745167,1.439791e+08,-605004.2,1,0
2,Afghanistan,1992,2299.0,12057433,1.439791e+08,315342.6,0,1
3,Afghanistan,1993,2589.0,14003760,1.439791e+08,1235689.4,0,1
4,Afghanistan,1994,2849.0,15455555,1.439791e+08,2156036.2,0,1
...,...,...,...,...,...,...,...,...
4910,Zimbabwe,2015,1302.0,14154937,1.135044e+07,-622997.0,1,0
4911,Zimbabwe,2016,1342.0,14452704,1.384487e+07,-622997.0,1,0
4912,Zimbabwe,2017,1363.0,14751101,1.857500e+07,-622997.0,1,0
4913,Zimbabwe,2018,1396.0,15052184,2.927889e+07,-622997.0,1,0


In [86]:
Dataset = pd.concat([Dataset], ignore_index=True)
shuffled_data = Dataset.sample(frac=1).reset_index(drop=True)

In [87]:
scaler = StandardScaler()
columns_to_standardize = ["Net migration", "Poblacion", "Homicides", "Remesas", "Expulsor", "Receptor"]
Dataset[columns_to_standardize] = scaler.fit_transform(Dataset[columns_to_standardize])

In [88]:
X = Dataset[["Homicides", "Poblacion", "Remesas", "Expulsor", "Receptor"]]
y = Dataset['Net migration']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=26)

In [89]:
model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [90]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R^2 Score:", r2)

Mean Squared Error: 0.17806111170523733
R^2 Score: 0.8153232438916365


In [91]:
cv_scores = cross_val_score(model, X, y, cv=5)
print("Cross Validation Scores:", cv_scores)
print("Cross Validation Accuracy:", cv_scores.mean())

Cross Validation Scores: [0.30763645 0.47488059 0.38647917 0.27462061 0.34418563]
Cross Validation Accuracy: 0.35756049077083807


In [92]:
joblib.dump(model, "modelo_entrenado.joblib")
joblib.dump(scaler, "scaler_entrenado.joblib")

['scaler_entrenado.joblib']