Skip to content

Commit

Permalink
Updated file paths
Browse files Browse the repository at this point in the history
  • Loading branch information
grsharman committed Apr 28, 2022
1 parent 829b1fa commit 5d6b5ed
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 98 deletions.
61 changes: 35 additions & 26 deletions 01_Make_models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "03d50317",
"id": "successful-responsibility",
"metadata": {},
"source": [
"## Code accompanyment to\"Machine learning applied to a modern-Pleistocene petrographic dataset: The global prediction of sand modal composition (GloPrSM) model\"\n",
Expand All @@ -13,15 +13,15 @@
},
{
"cell_type": "markdown",
"id": "61b900a0",
"id": "stone-headset",
"metadata": {},
"source": [
"## Step 1: Load sand modal composition data and make random forests models"
]
},
{
"cell_type": "markdown",
"id": "8fd11344",
"id": "differential-exposure",
"metadata": {},
"source": [
"### Import required modules"
Expand All @@ -30,7 +30,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "1163c19f",
"id": "tender-heather",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,12 +44,13 @@
"from sklearn.metrics import *\n",
"import time\n",
"import pickle\n",
"import pathlib"
"import pathlib\n",
"import os"
]
},
{
"cell_type": "markdown",
"id": "ca3bdcdc",
"id": "hollow-contribution",
"metadata": {},
"source": [
"### Load dependent variable data"
Expand All @@ -58,7 +59,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "86b7b6be",
"id": "inclusive-moral",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +69,7 @@
},
{
"cell_type": "markdown",
"id": "9fde66b0",
"id": "prescription-furniture",
"metadata": {},
"source": [
"### Feature selection"
Expand All @@ -77,7 +78,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "bf001f1e",
"id": "pointed-blink",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -89,7 +90,7 @@
},
{
"cell_type": "markdown",
"id": "4219c186",
"id": "suffering-black",
"metadata": {},
"source": [
"### Feature correlation (optional)"
Expand All @@ -98,7 +99,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "408ed37d",
"id": "voluntary-accuracy",
"metadata": {
"scrolled": true
},
Expand All @@ -114,7 +115,7 @@
},
{
"cell_type": "markdown",
"id": "d2edafd3",
"id": "selective-major",
"metadata": {},
"source": [
"### Model training\n",
Expand All @@ -124,16 +125,16 @@
{
"cell_type": "code",
"execution_count": null,
"id": "bed73d58",
"id": "spectacular-formation",
"metadata": {},
"outputs": [],
"source": [
"# Specify output folder\n",
"base_path = r'Z:\\Sharman\\GloPrSM_git\\v1.0'\n",
"# Specify output folder (current directory used by default)\n",
"base_path = os.getcwd() + '/v1.0'\n",
"\n",
"model_path = base_path + '\\\\' + 'models'\n",
"val_path = base_path + '\\\\' + 'validation'\n",
"test_path = base_path + '\\\\' + 'test_labels'\n",
"model_path = base_path + '/' + 'models'\n",
"val_path = base_path + '/' + 'validation'\n",
"test_path = base_path + '/' + 'test_labels'\n",
"\n",
"# Recursively creates the directory and does not raise an exception if the directory already exists\n",
"pathlib.Path(model_path).mkdir(parents=True, exist_ok=True)\n",
Expand All @@ -144,13 +145,13 @@
{
"cell_type": "code",
"execution_count": null,
"id": "e727d4dd",
"id": "technological-villa",
"metadata": {},
"outputs": [],
"source": [
"labels = ['FQ_QFL_IJ', 'LQ_QFL_IJ', 'QmQch_QmQpQch_IJ', 'QpQch_QmQpQch_IJ', 'FkFp_FpFk_IJ', 'LsLv_LvLsLm_IJ', 'LmLv_LvLsLm_IJ']\n",
"\n",
"splits = 10 # Note, 100 splits are used in the article\n",
"splits = 1 # Note, 100 splits are used in the article\n",
"rs = ShuffleSplit(n_splits=splits, test_size=.2, random_state=0)\n",
"stats = np.zeros(shape=(splits,len(labels)))\n",
"\n",
Expand Down Expand Up @@ -184,27 +185,35 @@
" tst_df.loc[:,'{}_label_{}'.format(label, i)] = test_labels\n",
"\n",
" # Export the validation results\n",
" val_df.to_csv(val_path+'\\\\'+'{}_validation_rlf.csv'.format(label),index=False)\n",
" tst_df.to_csv(test_path+'\\\\'+'{}_label_rlf.csv'.format(label),index=False)\n",
" val_df.to_csv(val_path+'/'+'{}_validation_rlf.csv'.format(label),index=False)\n",
" tst_df.to_csv(test_path+'/'+'{}_label_rlf.csv'.format(label),index=False)\n",
" \n",
" # Save the model\n",
" model_filename = 'model_'+str(i)+'.sav'\n",
" model_filepath = model_path+'\\\\'+str(label)\n",
" model_filepath = model_path+'/'+str(label)\n",
" pathlib.Path(model_filepath).mkdir(parents=True, exist_ok=True) # Recursively creates the directory and does not raise an exception if the directory already exists\n",
" pickle.dump(model, open(model_filepath+'\\\\'+model_filename, 'wb'))\n",
" pickle.dump(model, open(model_filepath+'/'+model_filename, 'wb'))\n",
" \n",
" print(i, 'R2: {}, {} sec'.format(round(r2,6), round(time.time()-start,1))) \n",
" i += 1\n",
" print()\n",
"\n",
"r2_df = pd.DataFrame(stats, columns=[x+'_R2' for x in labels])\n",
"r2_df.to_csv(base_path+'\\\\'+'R2_stats_rlf.csv',index=False)\n",
"r2_df.to_csv(base_path+'/'+'R2_stats_rlf.csv',index=False)\n",
"\n",
"# Save the list of features used in the models, so you know what is going on\n",
"features = pd.DataFrame()\n",
"features['Inputs'] = feature_list\n",
"features.to_csv(base_path+'\\\\'+'feature_list.csv', index=False)"
"features.to_csv(base_path+'/'+'feature_list.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "referenced-friend",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 5d6b5ed

Please sign in to comment.