<a href="https://colab.research.google.com/github/cconsta1/age-est-notebook/blob/main/age_estimation_dataset_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing and installing all the necessary libraries**

In [1]:
!pip install scikit-optimize git+https://github.com/hyperopt/hyperopt-sklearn.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/hyperopt/hyperopt-sklearn.git
  Cloning https://github.com/hyperopt/hyperopt-sklearn.git to /tmp/pip-req-build-xcxqkjb4
  Running command git clone --filter=blob:none --quiet https://github.com/hyperopt/hyperopt-sklearn.git /tmp/pip-req-build-xcxqkjb4
  Resolved https://github.com/hyperopt/hyperopt-sklearn.git to commit 4b3f6fde3a1ded2e71e8373d52c1b51a0239ef91
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 KB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pyaml>=16.9
  Downloading pyaml-21.10.1-py2.py3-none-any.whl (24 kB)
Collecting hyperopt>=0.2.6
  Downloadin

In [None]:
# !pip freeze

In [2]:
# Google colab

from google.colab import data_table
from google.colab import files

data_table.enable_dataframe_formatter()

# hyperopt

import hyperopt

from hyperopt import tpe
from hpsklearn import HyperoptEstimator, any_classifier, any_preprocessing
from hpsklearn.components import all_classifiers, all_preprocessing, any_classifier, any_preprocessing, \
any_regressor, all_regressors


# Hyperparameter optimization

import skopt
from skopt import BayesSearchCV

# system

import os
import io

# data analysis and plotting

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

from scipy.stats import zscore, shapiro
from random import randint

# data processing and model validation

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder, Normalizer, MinMaxScaler
from sklearn.decomposition import PCA, KernelPCA
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.metrics import r2_score, explained_variance_score, confusion_matrix, \
accuracy_score, classification_report, log_loss, mean_absolute_error, mean_squared_error
from math import sqrt
from sklearn.model_selection import cross_val_score, train_test_split, RepeatedStratifiedKFold, KFold, \
LeaveOneOut, GridSearchCV, RandomizedSearchCV, RepeatedStratifiedKFold

# classification libraries

from sklearn.linear_model import LinearRegression, LogisticRegression, SGDClassifier, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct, WhiteKernel, Matern, RationalQuadratic
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier, \
ExtraTreesRegressor, ExtraTreesClassifier, RandomForestRegressor
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier, plot_importance

import lightgbm as lgb

# Importing imputation libs. 

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer

# Missing data models

from itertools import combinations
from joblib import parallel_backend

# Export models into pickle
import pickle

# Various parameter settings

#%matplotlib inline

# To install sklearn type "pip install numpy scipy scikit-learn" to the anaconda terminal

# To change scientific numbers to float
#np.set_printoptions(formatter={'float_kind':'{:f}'.format})

# Increases the size of sns plots
#sns.set(rc={'figure.figsize':(12,10)})

# import sys
# !conda list Check the packages installed

# Displaying all the rows/columns in a data set (the default option is not to show them)

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# **Importing and preparing the data for the analysis**

In [3]:
uploaded = files.upload()

Saving age_dataset.csv to age_dataset.csv


In [4]:
raw_data = pd.read_csv(io.BytesIO(uploaded['age_dataset.csv']))
# Dataset is now stored in a Pandas Dataframe

In [5]:
raw_data.head()



Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Suchey-Brooks 1990,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Meindl and Lovejoy,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Lovejoy et al.1985,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Buckberry-Chamberlain,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55,Unnamed: 56,Unnamed: 57,Unnamed: 58,Unnamed: 59,Transition analysis,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,Unnamed: 72,Unnamed: 73,Unnamed: 74,Unnamed: 75,Unnamed: 76,Unnamed: 77,Unnamed: 78,Unnamed: 79,Unnamed: 80,Unnamed: 81,Unnamed: 82,Unnamed: 83,Unnamed: 84,Unnamed: 85,Unnamed: 86,Unnamed: 87,Unnamed: 88,Unnamed: 89,Unnamed: 90,Unnamed: 91,Unnamed: 92,Unnamed: 93,Unnamed: 94,Unnamed: 95,Unnamed: 96,Unnamed: 97,Unnamed: 98,Unnamed: 99,Unnamed: 100,Unnamed: 101,Unnamed: 102,Unnamed: 103,Unnamed: 104,Unnamed: 105,Unnamed: 106,Unnamed: 107,Unnamed: 108,Unnamed: 109,Unnamed: 110,Unnamed: 111,Unnamed: 112,Unnamed: 113,Unnamed: 114,Unnamed: 115,Unnamed: 116,Unnamed: 117,Unnamed: 118,Unnamed: 119,Unnamed: 120,Unnamed: 121,Unnamed: 122,Unnamed: 123,Unnamed: 124,Unnamed: 125,RESULTS,Unnamed: 127,Unnamed: 128,Unnamed: 129,Unnamed: 130,Unnamed: 131,Unnamed: 132,Unnamed: 133,Unnamed: 134,Unnamed: 135,Unnamed: 136,Unnamed: 137,Unnamed: 138,Unnamed: 139,Unnamed: 140,Unnamed: 141,Unnamed: 142,Unnamed: 143,Unnamed: 144,Unnamed: 145,Unnamed: 146,Unnamed: 147,Unnamed: 148,Unnamed: 149,Unnamed: 150,Unnamed: 151,Unnamed: 152,Unnamed: 153,Unnamed: 154,Unnamed: 155,Unnamed: 156,Unnamed: 157,Unnamed: 158,Unnamed: 159,Unnamed: 160,Unnamed: 161,Unnamed: 162,Unnamed: 163,Unnamed: 164,Unnamed: 165,Unnamed: 166,Unnamed: 167,Unnamed: 168,Unnamed: 169,Unnamed: 170,Unnamed: 171,Unnamed: 172,Unnamed: 173,Unnamed: 174,Unnamed: 175,Unnamed: 176,Unnamed: 177,Unnamed: 178,Unnamed: 179,Unnamed: 180,Unnamed: 181,Unnamed: 182,Unnamed: 183,Unnamed: 184,Unnamed: 185,Unnamed: 186,Unnamed: 187,Unnamed: 188,Unnamed: 189,Unnamed: 190,Unnamed: 191,Unnamed: 192,Unnamed: 193,Unnamed: 194,Unnamed: 195,Unnamed: 196,Unnamed: 197,Unnamed: 198,Unnamed: 199,Unnamed: 200,Unnamed: 201,Unnamed: 202,Unnamed: 203,Unnamed: 204,Unnamed: 205,Unnamed: 206,Unnamed: 207,Unnamed: 208,Unnamed: 209,Unnamed: 210,Unnamed: 211,Unnamed: 212,Unnamed: 213,Unnamed: 214,Unnamed: 215,Without cranial data,Unnamed: 217,Unnamed: 218,Unnamed: 219,Unnamed: 220,Unnamed: 221,Unnamed: 222,Unnamed: 223,Unnamed: 224,Unnamed: 225,Unnamed: 226,Unnamed: 227,Unnamed: 228,Unnamed: 229,Unnamed: 230,Unnamed: 231,Unnamed: 232,Unnamed: 233,Unnamed: 234,Unnamed: 235,Unnamed: 236,Unnamed: 237,Unnamed: 238,Unnamed: 239,Unnamed: 240,Unnamed: 241,Unnamed: 242,Unnamed: 243,Unnamed: 244,Unnamed: 245,Unnamed: 246,Unnamed: 247,Unnamed: 248,Unnamed: 249,Unnamed: 250,Unnamed: 251,Unnamed: 252
0,,,,,,,,,,,,,,,,,,,,,,,,,,,Vault System(1-7),,,,,,Lateral Anterior System(6-10),,,,,,,,,,,,Auricular surface,,,,,,,,,,,,,,,,Cranial Sutures,,,,,,,,,,Auricular Area,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Pubic Symphysis,,,,,,,,,,,,,,,,,,,,Male Forensic,,,,,,,,,,,,,,,Female forensic,,,,,,,,,,,,,,,Unknown forensic,,,,,,,,,,,,,,,Male archeological,,,,,,,,,,,,,,,Female archeological,,,,,,,,,,,,,,,Unknown archeological,,,,,,,,,,,,,,,Male Forensic,,,,,,Female forensic,,,,,,Unknown forensic,,,,,,Male archeological,,,,,,Female archeological,,,,,,Unknown archeological,,,,,,
1,,,,,Left,Left,Left,Right,Right,Right,Left,Right,,,,,Left,Right,Left,Right,Left,Right,Left,Right,Left,Right,Left,Left,Left,Right,Right,Right,Left,Left,Left,Right,Right,Right,Left,Left,,Right,Right,,Left,Right,Left,Right,Left,Right,Left,Right,Left,Right,Left,Left,Left,Right,Right,Right,Coronal Pterica,,Sagittal Obelica,,Lambdoidal Asterica,,Interpalatine suture,,Zygomaticomaxilary suture,,Superior Topography,,,,Inferior Topography,,,,Superior Caracteristics,,,,Apical Caracteristics,,,,Inferior Caracteristics,,,,Inferior texture,,,,Superior Exostoses,,,,Inferior Exostoses,,,,Posterior Exostoses,,,,Topography,,,,Texture,,,,Superior Protuberance,,,,Ventral Margin,,,,Dorsal margin,,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Cranial sutures,,,Pubic symphysis,,,Auricular area,,,Corrected,,,Uncorrected,,,Corrected,,,Uncorrected,,,Corrected,,,Uncorrected,,,Corrected,,,Uncorrected,,,Corrected,,,Uncorrected,,,Corrected,,,Uncorrected,,,
2,Skeleton Number,Sex,sex,Age,Left Phase Suchey,Mean Age,SD,Right Phase Suchey,Mean Age,SD,Left 1-midlamdoid,Right 1-midlamdoid,2-lambda,3-obelion,4-anterior sagital,5-bregma,Left 6-midcoronal,Right 6-midcoronal,Left 7-pterion,Right 7-pterion,Left 8-sphenofrontal,Right 8-sphenofrontal,Left 9-inferior sphenotemporal,Right 9-inferior sphenotemporal,Left 10-superior sphenotemporal,Right 10-superior sphenotemporal,Score,Mean,SD,Score,Mean,SD,Score,Mean,SD,Score,Mean,SD,Left Phase,Age min,Age max,Right Phase,Age min,Age max,Left Transverse organization,Right Transverse organization,Left Surface texture,Right Surface texture,Left Microposity,Right Microposity,Left Macroporositty,Right Macroporositty,Left Apical changes,Right Apical changes,Composite score,SD,Mean age,Composite score,SD,Mean age,min,max,min,max,min,max,min,max,min,max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,Left-min,Left-max,Right-min,Right-max,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,L95%,Point est,U95%,
3,WLH 012,F,2,84,6,60,12.4,6,60,12.4,0,0,1,2,1,0,1,1,2,3,2,3,0,0,0,0,7,39.4,9.1,8,39.4,9.1,5,41.1,10,7,45.5,8.9,8,60,,8,60,,4,5,4,4,3,3,3,2,3,3,17,72.25,12.73,17,72.25,12.73,4,5,4,4,1,1,1,3,1,1,2,3,2,3,2,3,3,3,4,4,3,4,4,4,3,4,4,4,4,4,2,2,2,2,5,6,5,6,4,5,3,4,2,3,2,3,4,6,6,6,3,3,3,4,4,4,4,4,6,7,7,7,5,5,5,5,47.8,69.8,90.7,57.6,81.2,105.7,17.1,31.7,61.3,70.9,110,110,53.9,85.1,110,49.5,68.8,88.8,57,78.7,102,15,30.2,64.2,71.3,110,110,51.6,80.2,110,49.5,68.8,88.8,57,78.7,102,15,30.2,64.2,71.3,110,110,51.6,80.2,110,60,78.4,91.1,57.6,81.2,105.7,17.1,31.7,61.3,70.9,110,110,53.9,85.1,110,59.4,77.4,90.4,57,78.7,102,15,30.2,64.2,71.3,110,110,51.6,80.2,110,59.4,77.4,90.4,57,78.7,102,15,30.2,64.2,71.3,110,110,51.6,80.2,110,58,79.6,100.7,70.2,94.5,110,57.5,77.7,98.2,67.7,90.7,110,57.5,77.7,98.2,67.7,90.7,110,67.4,82.8,93.7,70.2,94.5,110,66.5,81.9,93.1,67.7,90.7,110,66.5,81.9,93.1,67.7,90.7,110,
4,WLH 001,M,1,85,5,45.6,10.4,5,45.6,10.4,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,21,,,21,,,14,56.2,8.5,14,56.2,8.5,7,50,59,7,50,59,4,4,5,5,3,3,1,1,2,3,15,66.71,11.88,16,66.71,11.88,5,5,5,5,5,5,3,4,4,5,3,3,3,3,3,3,2,3,4,4,4,4,3,4,3,5,5,5,5,5,1,2,2,2,,,,,,,,,,,,,5,6,5,6,2,3,2,3,4,4,3,4,6,7,6,7,4,4,4,5,44.6,68.1,92.2,56.1,84.1,110,38.1,110,110,34.7,74.8,110,47.6,82.6,110,44.7,66.2,89.5,53.9,80.2,108.8,40.9,110,110,37.1,76.8,110,39.9,76.2,110,44.7,66.2,89.5,53.9,80.2,108.8,40.9,110,110,37.1,76.8,110,39.9,76.2,110,58.7,79,91.9,56.1,84.1,110,38.1,110,110,34.7,74.8,110,47.6,82.6,110,57.1,77.7,91.1,53.9,80.2,108.8,40.9,110,110,37.1,76.8,110,39.9,76.2,110,57.1,77.7,91.1,53.9,80.2,108.8,40.9,110,110,37.1,76.8,110,39.9,76.2,110,39.7,63.2,89.2,50.3,80.8,110,39,60.9,86.2,47.6,76.3,106.6,39,60.9,86.2,47.6,76.3,106.6,54.6,77.7,91.4,50.3,80.8,110,52,76.2,90.6,47.6,76.3,106.6,52,76.2,90.6,47.6,76.3,106.6,


In [6]:
df = raw_data.iloc[:,[2, 4, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 38, 41, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 3]]

In [7]:
df = pd.DataFrame(df.values[3:], columns=df.iloc[2])

df = df.astype(int)

df



2,sex,Left Phase Suchey,Right Phase Suchey,Left 1-midlamdoid,Right 1-midlamdoid,2-lambda,3-obelion,4-anterior sagital,5-bregma,Left 6-midcoronal,Right 6-midcoronal,Left 7-pterion,Right 7-pterion,Left 8-sphenofrontal,Right 8-sphenofrontal,Left 9-inferior sphenotemporal,Right 9-inferior sphenotemporal,Left 10-superior sphenotemporal,Right 10-superior sphenotemporal,Left Phase,Right Phase,Left Transverse organization,Right Transverse organization,Left Surface texture,Right Surface texture,Left Microposity,Right Microposity,Left Macroporositty,Right Macroporositty,Left Apical changes,Right Apical changes,Age
0,2,6,6,0,0,1,2,1,0,1,1,2,3,2,3,0,0,0,0,8,8,4,5,4,4,3,3,3,2,3,3,84
1,1,5,5,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,7,7,4,4,5,5,3,3,1,1,2,3,85
2,2,6,6,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,8,7,4,4,5,5,3,3,3,2,3,2,79
3,1,6,6,2,2,2,3,2,3,2,2,3,3,3,3,2,2,3,3,8,8,5,4,5,5,3,3,2,2,2,2,64
4,2,6,6,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,3,2,3,3,67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,1,6,6,1,2,2,3,2,1,1,1,3,2,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,2,2,66
136,1,6,6,1,1,1,3,3,2,0,1,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,2,3,3,3,65
137,1,6,6,2,2,3,3,3,1,0,1,2,2,3,3,0,1,1,1,8,8,4,4,5,5,3,3,2,2,3,2,73
138,2,6,6,2,1,2,1,3,3,1,1,3,3,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,3,2,81


In [8]:
df.describe()



2,sex,Left Phase Suchey,Right Phase Suchey,Left 1-midlamdoid,Right 1-midlamdoid,2-lambda,3-obelion,4-anterior sagital,5-bregma,Left 6-midcoronal,Right 6-midcoronal,Left 7-pterion,Right 7-pterion,Left 8-sphenofrontal,Right 8-sphenofrontal,Left 9-inferior sphenotemporal,Right 9-inferior sphenotemporal,Left 10-superior sphenotemporal,Right 10-superior sphenotemporal,Left Phase,Right Phase,Left Transverse organization,Right Transverse organization,Left Surface texture,Right Surface texture,Left Microposity,Right Microposity,Left Macroporositty,Right Macroporositty,Left Apical changes,Right Apical changes,Age
count,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0
mean,1.421429,5.142857,5.142857,0.957143,0.985714,1.392857,1.785714,1.457143,0.921429,0.507143,0.535714,1.564286,1.528571,1.464286,1.435714,0.45,0.464286,0.421429,0.464286,6.25,6.328571,4.071429,4.021429,4.407143,4.414286,2.8,2.778571,1.835714,1.942857,2.25,2.307143,55.785714
std,0.495561,1.166508,1.160324,0.928148,0.921257,1.036711,1.097975,1.061907,0.92967,0.694236,0.723806,1.012232,1.041878,1.171672,1.182673,0.649571,0.661486,0.814154,0.876641,1.964157,1.950624,0.902776,0.87711,0.98109,0.996293,0.497476,0.495561,0.754943,0.665398,0.62473,0.598516,18.880459
min,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,18.0
25%,1.0,5.0,5.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,4.0,4.0,4.0,4.0,3.0,3.0,1.0,1.75,2.0,2.0,43.0
50%,1.0,6.0,6.0,1.0,1.0,1.0,2.0,1.0,1.0,0.0,0.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,7.0,7.0,4.0,4.0,5.0,5.0,3.0,3.0,2.0,2.0,2.0,2.0,57.5
75%,2.0,6.0,6.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,1.0,2.0,2.0,3.0,3.0,1.0,1.0,1.0,1.0,8.0,8.0,5.0,5.0,5.0,5.0,3.0,3.0,2.0,2.0,3.0,3.0,72.0
max,2.0,6.0,6.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,8.0,8.0,5.0,5.0,5.0,5.0,4.0,3.0,3.0,3.0,3.0,3.0,94.0


In [9]:
# Add a new target vector called age groups

df['Age_groups'] = pd.cut(df['Age'], bins=[10,35,50,100], labels=False)

df = df.astype(int)

df



2,sex,Left Phase Suchey,Right Phase Suchey,Left 1-midlamdoid,Right 1-midlamdoid,2-lambda,3-obelion,4-anterior sagital,5-bregma,Left 6-midcoronal,Right 6-midcoronal,Left 7-pterion,Right 7-pterion,Left 8-sphenofrontal,Right 8-sphenofrontal,Left 9-inferior sphenotemporal,Right 9-inferior sphenotemporal,Left 10-superior sphenotemporal,Right 10-superior sphenotemporal,Left Phase,Right Phase,Left Transverse organization,Right Transverse organization,Left Surface texture,Right Surface texture,Left Microposity,Right Microposity,Left Macroporositty,Right Macroporositty,Left Apical changes,Right Apical changes,Age,Age_groups
0,2,6,6,0,0,1,2,1,0,1,1,2,3,2,3,0,0,0,0,8,8,4,5,4,4,3,3,3,2,3,3,84,2
1,1,5,5,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,7,7,4,4,5,5,3,3,1,1,2,3,85,2
2,2,6,6,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,8,7,4,4,5,5,3,3,3,2,3,2,79,2
3,1,6,6,2,2,2,3,2,3,2,2,3,3,3,3,2,2,3,3,8,8,5,4,5,5,3,3,2,2,2,2,64,2
4,2,6,6,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,3,2,3,3,67,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,1,6,6,1,2,2,3,2,1,1,1,3,2,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,2,2,66,2
136,1,6,6,1,1,1,3,3,2,0,1,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,2,3,3,3,65,2
137,1,6,6,2,2,3,3,3,1,0,1,2,2,3,3,0,1,1,1,8,8,4,4,5,5,3,3,2,2,3,2,73,2
138,2,6,6,2,1,2,1,3,3,1,1,3,3,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,3,2,81,2


In [10]:
# View the data as a table

data_table.DataTable(df, include_index=False, num_rows_per_page=10, max_columns=40)

2,sex,Left Phase Suchey,Right Phase Suchey,Left 1-midlamdoid,Right 1-midlamdoid,2-lambda,3-obelion,4-anterior sagital,5-bregma,Left 6-midcoronal,Right 6-midcoronal,Left 7-pterion,Right 7-pterion,Left 8-sphenofrontal,Right 8-sphenofrontal,Left 9-inferior sphenotemporal,Right 9-inferior sphenotemporal,Left 10-superior sphenotemporal,Right 10-superior sphenotemporal,Left Phase,Right Phase,Left Transverse organization,Right Transverse organization,Left Surface texture,Right Surface texture,Left Microposity,Right Microposity,Left Macroporositty,Right Macroporositty,Left Apical changes,Right Apical changes,Age,Age_groups
0,2,6,6,0,0,1,2,1,0,1,1,2,3,2,3,0,0,0,0,8,8,4,5,4,4,3,3,3,2,3,3,84,2
1,1,5,5,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,7,7,4,4,5,5,3,3,1,1,2,3,85,2
2,2,6,6,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,8,7,4,4,5,5,3,3,3,2,3,2,79,2
3,1,6,6,2,2,2,3,2,3,2,2,3,3,3,3,2,2,3,3,8,8,5,4,5,5,3,3,2,2,2,2,64,2
4,2,6,6,0,0,0,1,0,0,0,0,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,3,2,3,3,67,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,1,6,6,1,2,2,3,2,1,1,1,3,2,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,2,2,66,2
136,1,6,6,1,1,1,3,3,2,0,1,1,1,1,1,0,0,0,0,8,8,5,5,5,5,3,3,2,3,3,3,65,2
137,1,6,6,2,2,3,3,3,1,0,1,2,2,3,3,0,1,1,1,8,8,4,4,5,5,3,3,2,2,3,2,73,2
138,2,6,6,2,1,2,1,3,3,1,1,3,3,3,3,1,1,0,0,8,8,5,5,5,5,3,3,3,3,3,2,81,2


# **Variables dictionary**

In [11]:
df.columns

Index(['sex', 'Left Phase Suchey', 'Right Phase Suchey', 'Left 1-midlamdoid',
       'Right 1-midlamdoid', '2-lambda', '3-obelion', '4-anterior sagital',
       '5-bregma', 'Left 6-midcoronal', 'Right 6-midcoronal', 'Left 7-pterion',
       'Right 7-pterion', 'Left 8-sphenofrontal', 'Right 8-sphenofrontal',
       'Left 9-inferior sphenotemporal', 'Right 9-inferior sphenotemporal',
       'Left 10-superior sphenotemporal', 'Right 10-superior sphenotemporal',
       'Left Phase', 'Right Phase', 'Left Transverse organization',
       'Right Transverse organization', 'Left Surface texture',
       'Right Surface texture', 'Left Microposity', 'Right Microposity',
       'Left Macroporositty', 'Right Macroporositty', 'Left Apical changes',
       'Right Apical changes', 'Age', 'Age_groups'],
      dtype='object', name=2)

In [12]:
set_of_variables = {
    "Suchey Brooks 1990": [
        'Right Phase Suchey'
        ],
    "Meindl and Lovejoy": [
        'Right 1-midlamdoid',
        '2-lambda', 
        '3-obelion', 
        '4-anterior sagital',
        '5-bregma', 
        'Right 6-midcoronal', 
        'Right 7-pterion',
        'Right 8-sphenofrontal', 
        'Right 9-inferior sphenotemporal', 
        'Right 10-superior sphenotemporal'
        ],
    "Lovejoy et al": [
        "Right Phase"
    ],
    "Buckberry and Chamberlain": [
        'Right Transverse organization',
        'Right Surface texture',
        'Right Microposity', 
        'Right Macroporositty', 
        'Right Apical changes'
        ],
    "Suchey Brooks 1990 and Lovejoy et al": [
        'Right Phase Suchey',
        'Right Phase' 
    ],
    "Suchey Brooks 1990 and Buckberry Chamberlain": [
        'Right Transverse organization',
        'Right Surface texture',
        'Right Microposity', 
        'Right Macroporositty', 
        'Right Apical changes',
        'Right Phase Suchey'
    ],
    "All": [
        'Right Phase Suchey',
        'Right 1-midlamdoid',
        '2-lambda', 
        '3-obelion', 
        '4-anterior sagital',
        '5-bregma', 
        'Right 6-midcoronal', 
        'Right 7-pterion',
        'Right 8-sphenofrontal', 
        'Right 9-inferior sphenotemporal', 
        'Right 10-superior sphenotemporal',
        "Right Phase",
        'Right Transverse organization',
        'Right Surface texture',
        'Right Microposity', 
        'Right Macroporositty', 
        'Right Apical changes'
    ]
} 


In [13]:
set_of_variables

{'Suchey Brooks 1990': ['Right Phase Suchey'],
 'Meindl and Lovejoy': ['Right 1-midlamdoid',
  '2-lambda',
  '3-obelion',
  '4-anterior sagital',
  '5-bregma',
  'Right 6-midcoronal',
  'Right 7-pterion',
  'Right 8-sphenofrontal',
  'Right 9-inferior sphenotemporal',
  'Right 10-superior sphenotemporal'],
 'Lovejoy et al': ['Right Phase'],
 'Buckberry and Chamberlain': ['Right Transverse organization',
  'Right Surface texture',
  'Right Microposity',
  'Right Macroporositty',
  'Right Apical changes'],
 'Suchey Brooks 1990 and Lovejoy et al': ['Right Phase Suchey', 'Right Phase'],
 'Suchey Brooks 1990 and Buckberry Chamberlain': ['Right Transverse organization',
  'Right Surface texture',
  'Right Microposity',
  'Right Macroporositty',
  'Right Apical changes',
  'Right Phase Suchey'],
 'All': ['Right Phase Suchey',
  'Right 1-midlamdoid',
  '2-lambda',
  '3-obelion',
  '4-anterior sagital',
  '5-bregma',
  'Right 6-midcoronal',
  'Right 7-pterion',
  'Right 8-sphenofrontal',
  'Rig

# **Classification (sklearn)** 

In [17]:
dff = df[df["sex"]==1]
y = dff['Age_groups'].values

for key, value in set_of_variables.items():

  X = dff[value].values

  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.75, test_size=0.25, stratify=y)

  filename = 'classification_right_men_'+key.replace(" ","_")+".dat"
  infofilename = 'classification_right_men_'+key.replace(" ","_")+".txt"

  file = open(infofilename, "w")

  model = HyperoptEstimator(classifier=any_classifier('cla'), preprocessing=any_preprocessing('pre'), \
                          algo=tpe.suggest, max_evals=75, trial_timeout=30, continuous_loss_fn=False, loss_fn=mean_absolute_error)

  model.fit(X_train, y_train)
  # summarize performance
  acc = model.score(X_test, y_test)

  cnfm = confusion_matrix(y_test, model.predict(X_test))

  pipe = Pipeline([('scaler', model.best_model()['preprocs'][0] ), ('clf', model.best_model()['learner'] )])

  pipe.fit(X, y)

  result_loocv = cross_val_score(estimator=pipe, X=X, y=y, scoring='accuracy', cv=LeaveOneOut(), error_score='raise')

  pickle.dump(pipe, open(filename, "wb"))

  file.write("---------------------------------\n")
  file.write(key + '\n')

  file.write("Dataset size: "+ str(len(X))+' '+ str(len(y))+'\n')

  file.write("Best classifier: " + str(model.best_model()) + '\n')


  file.write("\nAccuracy: "+ str(acc) +'\n')
  
  file.write("\nConfusion matrix: \n" + str(cnfm) + '\n')

  file.write("LOOCV accuracy: " + str(result_loocv.mean()) + '\n')

  # Close the file
  file.close()


100%|██████████| 1/1 [00:00<00:00,  1.00trial/s, best loss: 1.5833333333333333]
100%|██████████| 2/2 [00:00<00:00,  6.21trial/s, best loss: 1.5833333333333333]
100%|██████████| 3/3 [00:00<00:00,  1.40trial/s, best loss: 0.25]
100%|██████████| 4/4 [00:00<00:00,  3.55trial/s, best loss: 0.25]
100%|██████████| 5/5 [00:00<00:00,  4.74trial/s, best loss: 0.25]
100%|██████████| 6/6 [00:00<00:00,  2.73trial/s, best loss: 0.25]
100%|██████████| 7/7 [00:00<00:00,  6.40trial/s, best loss: 0.25]
100%|██████████| 8/8 [00:00<00:00,  2.98trial/s, best loss: 0.25]
100%|██████████| 9/9 [00:00<00:00,  1.04trial/s, best loss: 0.25]
100%|██████████| 10/10 [00:00<00:00, 12.65trial/s, best loss: 0.25]
100%|██████████| 11/11 [00:00<00:00, 11.84trial/s, best loss: 0.25]
100%|██████████| 12/12 [00:00<00:00,  3.54trial/s, best loss: 0.25]
100%|██████████| 13/13 [00:00<00:00, 11.47trial/s, best loss: 0.25]
100%|██████████| 14/14 [00:00<00:00,  2.22trial/s, best loss: 0.25]
100%|██████████| 15/15 [00:00<00:00,  




100%|██████████| 49/49 [00:00<00:00,  3.29trial/s, best loss: 0.16666666666666666]
100%|██████████| 50/50 [00:00<00:00,  7.29trial/s, best loss: 0.16666666666666666]
100%|██████████| 51/51 [00:00<00:00,  1.44trial/s, best loss: 0.16666666666666666]
100%|██████████| 52/52 [00:00<00:00,  6.18trial/s, best loss: 0.16666666666666666]
100%|██████████| 53/53 [00:00<00:00,  3.66trial/s, best loss: 0.16666666666666666]
100%|██████████| 54/54 [00:00<00:00,  5.88trial/s, best loss: 0.16666666666666666]
100%|██████████| 55/55 [00:00<00:00,  6.83trial/s, best loss: 0.16666666666666666]
100%|██████████| 56/56 [00:00<00:00,  3.00trial/s, best loss: 0.16666666666666666]
100%|██████████| 57/57 [00:00<00:00,  1.18trial/s, best loss: 0.16666666666666666]
100%|██████████| 58/58 [00:00<00:00,  7.10trial/s, best loss: 0.16666666666666666]
100%|██████████| 59/59 [00:00<00:00,  3.65trial/s, best loss: 0.16666666666666666]
100%|██████████| 60/60 [00:00<00:00,  7.02trial/s, best loss: 0.16666666666666666]
100%




100%|██████████| 16/16 [00:00<00:00,  4.18trial/s, best loss: 0.08333333333333333]
100%|██████████| 17/17 [00:00<00:00,  7.07trial/s, best loss: 0.08333333333333333]
100%|██████████| 18/18 [00:00<00:00, 11.76trial/s, best loss: 0.08333333333333333]
100%|██████████| 19/19 [00:00<00:00,  1.13trial/s, best loss: 0.08333333333333333]
100%|██████████| 20/20 [00:00<00:00, 12.00trial/s, best loss: 0.08333333333333333]
100%|██████████| 21/21 [00:00<00:00,  7.19trial/s, best loss: 0.08333333333333333]
100%|██████████| 22/22 [00:00<00:00,  5.81trial/s, best loss: 0.08333333333333333]
100%|██████████| 23/23 [00:00<00:00,  3.40trial/s, best loss: 0.08333333333333333]
100%|██████████| 24/24 [00:00<00:00,  6.80trial/s, best loss: 0.08333333333333333]
100%|██████████| 25/25 [00:00<00:00,  6.08trial/s, best loss: 0.08333333333333333]
100%|██████████| 26/26 [00:00<00:00,  3.56trial/s, best loss: 0.08333333333333333]
100%|██████████| 27/27 [00:00<00:00,  6.51trial/s, best loss: 0.08333333333333333]
100%




100%|██████████| 32/32 [00:00<00:00,  5.75trial/s, best loss: 0.08333333333333333]
100%|██████████| 33/33 [00:00<00:00,  3.60trial/s, best loss: 0.08333333333333333]
100%|██████████| 34/34 [00:00<00:00,  6.80trial/s, best loss: 0.08333333333333333]
100%|██████████| 35/35 [00:00<00:00,  6.18trial/s, best loss: 0.08333333333333333]
100%|██████████| 36/36 [00:00<00:00,  3.44trial/s, best loss: 0.08333333333333333]
100%|██████████| 37/37 [00:00<00:00,  7.49trial/s, best loss: 0.08333333333333333]
100%|██████████| 38/38 [00:00<00:00,  3.28trial/s, best loss: 0.08333333333333333]
100%|██████████| 39/39 [00:01<00:00,  1.81s/trial, best loss: 0.08333333333333333]
100%|██████████| 40/40 [00:00<00:00,  2.04trial/s, best loss: 0.08333333333333333]
100%|██████████| 41/41 [00:00<00:00,  6.44trial/s, best loss: 0.08333333333333333]
100%|██████████| 42/42 [00:00<00:00,  5.02trial/s, best loss: 0.08333333333333333]
100%|██████████| 43/43 [00:00<00:00,  3.52trial/s, best loss: 0.08333333333333333]
100%




100%|██████████| 64/64 [00:00<00:00,  5.74trial/s, best loss: 0.0]
 98%|█████████▊| 64/65 [00:00<?, ?trial/s, best loss=?]




100%|██████████| 65/65 [00:00<00:00,  5.96trial/s, best loss: 0.0]
100%|██████████| 66/66 [00:00<00:00,  3.49trial/s, best loss: 0.0]
100%|██████████| 67/67 [00:00<00:00,  6.37trial/s, best loss: 0.0]
100%|██████████| 68/68 [00:00<00:00,  6.44trial/s, best loss: 0.0]
100%|██████████| 69/69 [00:00<00:00,  3.40trial/s, best loss: 0.0]
100%|██████████| 70/70 [00:00<00:00,  6.63trial/s, best loss: 0.0]
100%|██████████| 71/71 [00:00<00:00,  5.36trial/s, best loss: 0.0]
100%|██████████| 72/72 [00:00<00:00,  3.65trial/s, best loss: 0.0]
100%|██████████| 73/73 [00:00<00:00,  2.65trial/s, best loss: 0.0]
100%|██████████| 74/74 [00:00<00:00,  6.75trial/s, best loss: 0.0]
100%|██████████| 75/75 [00:00<00:00,  1.32trial/s, best loss: 0.0]
100%|██████████| 1/1 [00:00<00:00,  1.79trial/s, best loss: 0.9166666666666666]
100%|██████████| 2/2 [00:00<00:00,  7.42trial/s, best loss: 0.9166666666666666]
100%|██████████| 3/3 [00:00<00:00, 13.12trial/s, best loss: 0.9166666666666666]
100%|██████████| 4/4 [0




100%|██████████| 51/51 [00:00<00:00,  5.90trial/s, best loss: 0.3333333333333333]
100%|██████████| 52/52 [00:00<00:00,  6.64trial/s, best loss: 0.3333333333333333]
100%|██████████| 53/53 [00:00<00:00,  3.30trial/s, best loss: 0.3333333333333333]
100%|██████████| 54/54 [00:00<00:00,  5.70trial/s, best loss: 0.3333333333333333]
100%|██████████| 55/55 [00:01<00:00,  1.11s/trial, best loss: 0.3333333333333333]
100%|██████████| 56/56 [00:00<00:00,  6.32trial/s, best loss: 0.3333333333333333]
100%|██████████| 57/57 [00:00<00:00,  1.42trial/s, best loss: 0.3333333333333333]
100%|██████████| 58/58 [00:00<00:00,  6.82trial/s, best loss: 0.3333333333333333]
100%|██████████| 59/59 [00:00<00:00,  6.69trial/s, best loss: 0.3333333333333333]
100%|██████████| 60/60 [00:00<00:00,  3.40trial/s, best loss: 0.3333333333333333]
100%|██████████| 61/61 [00:00<00:00,  5.69trial/s, best loss: 0.3333333333333333]
100%|██████████| 62/62 [00:00<00:00,  6.08trial/s, best loss: 0.3333333333333333]
100%|██████████|

In [15]:
!ls

age_dataset.csv
classification_right_women_All.dat
classification_right_women_All.txt
classification_right_women_Buckberry_and_Chamberlain.dat
classification_right_women_Buckberry_and_Chamberlain.txt
classification_right_women_Lovejoy_et_al.dat
classification_right_women_Lovejoy_et_al.txt
classification_right_women_Meindl_and_Lovejoy.dat
classification_right_women_Meindl_and_Lovejoy.txt
classification_right_women_Suchey_Brooks_1990_and_Buckberry_Chamberlain.dat
classification_right_women_Suchey_Brooks_1990_and_Buckberry_Chamberlain.txt
classification_right_women_Suchey_Brooks_1990_and_Lovejoy_et_al.dat
classification_right_women_Suchey_Brooks_1990_and_Lovejoy_et_al.txt
classification_right_women_Suchey_Brooks_1990.dat
classification_right_women_Suchey_Brooks_1990.txt
sample_data


In [16]:
with open('classification_right_women_All.txt', 'r') as f:
    print(f.read())

---------------------------------
All
Dataset size: 59 59
Best classifier: {'learner': XGBClassifier(colsample_bylevel=0.7759588377825762,
              colsample_bytree=0.7497963493327989, gamma=0.3794299509090961,
              learning_rate=0.022921738580900908, max_depth=9,
              min_child_weight=4, missing=nan, n_estimators=400,
              objective='multi:softprob', reg_alpha=0.13586284557097217,
              reg_lambda=1.0278691494007828, seed=3,
              subsample=0.6565812625309405, use_label_encoder=False), 'preprocs': (Normalizer(norm='max'),), 'ex_preprocs': ()}

Accuracy: 0.8

Confusion matrix: 
[[1 1 0]
 [0 2 1]
 [0 1 9]]
LOOCV accuracy: 0.7966101694915254



# **Regression (sklearn)**

In [None]:
dff = df[df["sex"]==2]
y = dff['Age'].values

for key, value in set_of_variables.items():

  X = dff[value].values

  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.75, test_size=0.25)

  filename = 'regression_right_women_'+key.replace(" ","_")+'.dat'
  infofilename = 'regression_right_women_'+key.replace(" ","_")+".txt"

  file = open(infofilename, "w")

  model = HyperoptEstimator(regressor=any_regressor('reg'), preprocessing=any_preprocessing('pre'), \
                          algo=tpe.suggest, max_evals=75, loss_fn=mean_absolute_error, trial_timeout=30,continuous_loss_fn=False)

  model.fit(X_train, y_train)
  # summarize performance
  acc = model.score(X_test, y_test)

  pipe = Pipeline([('scaler', model.best_model()['preprocs'][0] ), ('clf', model.best_model()['learner'] )])

  pipe.fit(X, y)

  pickle.dump(pipe, open(filename, "wb"))

  file.write("---------------------------------\n")
  file.write(key + '\n')

  file.write("Dataset size: "+ str(len(X))+' '+ str(len(y))+'\n')

  file.write("Best classifier: " + str(model.best_model()) + '\n')

  file.write("\nAccuracy: "+ str(acc) +'\n')

  # Close the file
  file.close()


In [None]:
!ls

In [None]:
plt.plot(model.predict(X_test),'ro')
plt.plot(y_test,'b*')

# **Neural Networks**

In [None]:
import tensorflow as tf
from tensorflow import keras

# **Classification (tensorflow)**

In [None]:
from re import VERBOSE
dff = df[df["sex"]==2]
y = dff['Age_groups'].values

for key, value in set_of_variables.items():

  X = dff[value].values

  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.75, test_size=0.25, stratify=y)

  modelfilename = 'ann_classification_right_women_'+key.replace(" ","_")+".dat"

  infofilename = 'ann_classification_right_women_'+key.replace(" ","_")+".txt"

  file = open(infofilename, "w")

  model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(3, activation='softmax')
    ])
  
  model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

  model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=500, verbose = 0)

  # summarize performance
  acc = model.evaluate(X_test, y_test)

  cnfm = confusion_matrix(y_test, model.predict(X_test).argmax(axis=-1)) 

  #result_loocv = cross_val_score(estimator=model, X=X, y=y, scoring='accuracy', cv=LeaveOneOut(), error_score='raise')

  pickle.dump(model, open(filename, "wb"))

  
  file.write('\n\n')
  file.write("---------------------------------\n")
  file.write(key + '\n')

  file.write("Dataset size: "+ str(len(X))+' '+ str(len(y))+'\n')

  file.write("\nAccuracy: "+ str(acc) +'\n')
  
  file.write("\nConfusion matrix: \n" + str(cnfm))

  #print("\nLOOCV accuracy: ", result_loocv.mean())

  # Close the file
  file.close()
  


In [None]:
!ls

In [None]:
with open('ann_classification_right_women_All.txt', 'r') as f:
    print(f.read())

# **Regression**

In [None]:
dff = df[df["sex"]==2]
y = dff['Age'].values

for key, value in set_of_variables.items():

  X = dff[value].values

  X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.75, test_size=0.25)

  modelfilename = 'ann_regression_right_women_'+key.replace(" ","_")+".dat"

  infofilename = 'ann_regression_right_women_'+key.replace(" ","_")+".txt"

  file = open(infofilename, "w")

  model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(20, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1)
    ])
  
  model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['accuracy']
)

  model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=500)

  # summarize performance
  acc = model.evaluate(X_test, y_test)

  pickle.dump(model, open(filename, "wb"))

  file.write('\n\n')
  file.write("---------------------------------\n")
  file.write(key + '\n')

  file.write("Dataset size: "+ str(len(X))+' '+ str(len(y))+'\n')

  file.write("\nAccuracy: "+ str(acc) +'\n')

  # Close the file
  file.close()

In [None]:
!ls

In [None]:
with open('ann_regression_right_men_Lovejoy_et_al.txt', 'r') as f:
    print(f.read())

In [None]:
files.view

In [None]:
!pwd

In [None]:
!zip -r /content/file.zip /content/*


In [None]:
files.download("/content/file.zip")
