In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import pickle
import warnings
import re

In [None]:
import multiprocessing

cores = multiprocessing.cpu_count() # Count the number of cores in a computer
cores

8

In [None]:
pip install flaml



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold,cross_val_score
from sklearn.linear_model import LassoCV,LassoLarsIC,Lasso,LinearRegression

In [None]:
def lasso_moran_select(data,s):

    X_coords = data.iloc[:,:-3]

    y = data['y']

    X1 = data.X1.values
    X2 = data.X2.values

    moran = data.iloc[:,2:-3].values

    moran_X1_names = ["moran_X1_" + str(i) for i in range(moran.shape[1])]
    X_coords.loc[:,moran_X1_names] = moran*X1.reshape(-1,1)

    moran_X2_names = ["moran_X2_" + str(i) for i in range(moran.shape[1])]
    X_coords.loc[:,moran_X2_names] = moran*X2.reshape(-1,1)

    kf = KFold(n_splits=5, shuffle=True, random_state=42)

    if s == 'mse':
        model = LassoCV(cv=kf,  alphas=np.logspace(-4, -2, 20))
        model.fit(X_coords, y)
        r2_scores = cross_val_score(model, X_coords, y, cv=kf, scoring='r2')

    if s == 'bic':
        model = LassoLarsIC(criterion='bic')
        model.fit(X_coords, y)
        r2_scores = cross_val_score(model, X_coords, y, cv=kf, scoring='r2')


    selected_index = np.where(model.coef_ != 0)[0]
    selected_feature = list(X_coords.columns[selected_index])

    moran_X1s = [s for s in selected_feature if s.startswith('moran_X1_')]
    moran_X2s = [s for s in selected_feature if s.startswith('moran_X2_')]
    moran_s = [s for s in selected_feature if re.match(r"^moran_\d+$", s)]

    numbers_s = set(int(x.split('_')[1]) for x in moran_s)
    numbers_x1 = set(int(x.split('_')[2]) for x in moran_X1s)
    numbers_x2 = set(int(x.split('_')[2]) for x in moran_X2s)

    # Find intersection of numbers in both lists
    common_numbers = numbers_s.union(numbers_x1.union(numbers_x2))

    moran_selected = ['moran_' + str(num) for num in sorted(common_numbers)]

    return moran_selected

In [None]:
weights = ['exp','queen']#2
models = ['xgboost','lgbm','rf'] #3
selection = ['xy','all','mse','bic'] #4

In [None]:
from flaml import AutoML

model = AutoML()

settings = {
    "time_budget": 60*30, #seconds
    #"max_iter": 200,
    "metric": 'r2',
    "task": 'regression',
    "n_splits": 5,
    "eval_method": 'cv',
    "seed": 111,    # random seed
    "verbose":2
}

In [None]:
warnings.filterwarnings('ignore')

for w in weights:
    data = pd.read_csv('/content/drive/MyDrive/MEM Colab Runs/data/us_moran_' + w + '.csv',index_col=0)

    for s in selection:

        if s == 'all':
            selected = list(data.columns[2:-3])
        if s == 'xy':
            selected = list(data.columns[-3: -1])
        if s =='mse':
            selected = lasso_moran_select(data,s)
        if s == 'bic':
            selected = lasso_moran_select(data,s)
        if s == 'aic':
            selected = lasso_moran_select(data,s)

        X_coords = data[['X1','X2']  + selected]

        moran = X_coords[selected]
        moran = (np.array(moran) - np.array(moran).mean(axis=0))/np.array(moran).std(axis=0)
        X_coords[selected] = moran

        y = data['y']

        for m in models:
            model.fit(X_coords, y, estimator_list = [m], **settings)

            name = "/content/drive/MyDrive/MEM Colab Runs/models/us_" + m + "_moran_" + s + "_lasso_" + w + ".model"

            pickle.dump(model, open(name, 'wb'))

            print(name + ' CV-R2 = {0:.3g}'.format(1-model.best_loss))

/content/drive/MyDrive/MEM Colab Runs/models/us_xgboost_moran_xy_lasso_exp.model CV-R2 = 0.931
/content/drive/MyDrive/MEM Colab Runs/models/us_lgbm_moran_xy_lasso_exp.model CV-R2 = 0.931
/content/drive/MyDrive/MEM Colab Runs/models/us_rf_moran_xy_lasso_exp.model CV-R2 = 0.842
/content/drive/MyDrive/MEM Colab Runs/models/us_xgboost_moran_all_lasso_exp.model CV-R2 = 0.894
/content/drive/MyDrive/MEM Colab Runs/models/us_lgbm_moran_all_lasso_exp.model CV-R2 = 0.895
/content/drive/MyDrive/MEM Colab Runs/models/us_rf_moran_all_lasso_exp.model CV-R2 = 0.787
/content/drive/MyDrive/MEM Colab Runs/models/us_xgboost_moran_mse_lasso_exp.model CV-R2 = 0.91
/content/drive/MyDrive/MEM Colab Runs/models/us_lgbm_moran_mse_lasso_exp.model CV-R2 = 0.909
/content/drive/MyDrive/MEM Colab Runs/models/us_rf_moran_mse_lasso_exp.model CV-R2 = 0.809
/content/drive/MyDrive/MEM Colab Runs/models/us_xgboost_moran_bic_lasso_exp.model CV-R2 = 0.931
/content/drive/MyDrive/MEM Colab Runs/models/us_lgbm_moran_bic_lasso