<a href="https://colab.research.google.com/github/cornelius152/Computational-Cognitive-Science-3-Eye-Tracking-Project/blob/main/Computational_Cognitive_Science_3_Eye_Tracking_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Libraries**

In [1]:
# Import global libraries and packages
import pandas as pd
import numpy as np
import scipy
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore', message='.*', category=UserWarning, module='sklearn') # To suppress precision score warning

In [2]:
# Mounting drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Importing eye-tracking data**

## Building Dictionary
Dataframe of participant IDs (participant_x) as keys, and x, y eye-tracking coordinate pairs as values

In [3]:
# Allows for 'operating system' dependent functionality
import os

# Allows us to manipulate strings more robustly
import string

# Set the directory where the CSV files are located
directory = '/content/drive/MyDrive/Computational Cognitive Science 3 Project/Cognitive Science 3/Project/Data/dataverse_files'

# Use os.listdir() to get a list of all the file names in the directory
# Return a list containing the names of the entries in the directory given by path
file_list = os.listdir(directory)
 
# Instantiating subject IDs
i = 0

# Building an empty dictionary to store all participants' IDs and x, y coordinate pairs as dataframes
# Keys = 'participant_1, 'participant_2...'; values = x, y coordinates as two separate columns
data = {}

# Use a for loop to iterate through the list of file names
for file_name in file_list:
    # Check if the file is a CSV file
    if file_name.endswith('.csv'):
        # Creating a new name for each participant
        name = 'participant' + '_' + str(file_name.split('.')[0])
        # Use pandas.DataFrame to create new dataframe from pandas.read_csv() which reads the CSV file
        data[name] = pd.DataFrame(pd.read_csv(os.path.join(directory, file_name)))
        i += 1
        if i == 111: # Total number of participants
            break

# Sorting diciontary of eye-tracking data numerically by looking at the number after "participant_'x'"
data = dict(sorted(data.items(), key=lambda x: int(x[0][12:])))

# Creating a dataframe from data
data_df = pd.concat(data.values(), axis = 1)

In [4]:
data_df.head(3)

Unnamed: 0,GazeX,GazeY,GazeX.1,GazeY.1,GazeX.2,GazeY.2,GazeX.3,GazeY.3,GazeX.4,GazeY.4,...,GazeX.5,GazeY.5,GazeX.6,GazeY.6,GazeX.7,GazeY.7,GazeX.8,GazeY.8,GazeX.9,GazeY.9
0,939.0,528.0,946.0,502.0,976.0,568.0,1012.0,532.0,966.0,530.0,...,940.0,532.0,962.0,547.0,938.0,492.0,966.0,480.0,917.0,532.0
1,935.0,516.0,920.0,408.0,962.0,546.0,989.0,556.0,1048.0,629.0,...,940.0,543.0,958.0,527.0,914.0,504.0,958.0,439.0,916.0,530.0
2,934.0,524.0,944.0,508.0,973.0,584.0,1014.0,536.0,986.0,552.0,...,936.0,543.0,962.0,534.0,909.0,502.0,944.0,452.0,929.0,542.0


# **Import Clinical Data**

In [5]:
# Reading in clinical scores data
clinical_scores = pd.read_csv('/content/drive/MyDrive/Computational Cognitive Science 3 Project/Cognitive Science 3/dataverse_files/clinical_scores.csv')
clinical_scores = clinical_scores.drop(columns='Name')
# All 'numerical' values are of type string

In [6]:
clinical_scores.head(3)

Unnamed: 0,AQ_SUM,bdi_sum,wurs_sum
0,18.0,18.0,41.0
1,25.0,14.0,47.0
2,25.0,33.0,12.0


In [7]:
# Rename columns
clinical_scores = clinical_scores.rename(columns={'Name':'name', 'AQ_SUM':'aq', 'bdi_sum': 'bdi', 'wurs_sum': 'wurs'})

#**Processing Data**

### Checking for any nan

In [8]:
# Running a for loop through each variabls in clinical scores
for var in clinical_scores:
    # Checking whether there are any nan values
        check_nan = clinical_scores[var].isnull().values.any()
        if check_nan == True:
            print(f'{var} --- has null values')

# The values in each column are of type <class 'numpy.float64'>

print()

# Check how many nans there are in each dataframe
for var in clinical_scores:
    # Reset count to 0 for each dataframe
    count = 0
    for i in clinical_scores[var]:
        # Checking whether there are any nan values in the column
        if str(i) == 'nan':
            count += 1
    print(f'nans in {var}: {count}')

aq --- has null values
bdi --- has null values
wurs --- has null values

nans in aq: 10
nans in bdi: 7
nans in wurs: 6


### Removing nan

In [9]:
# Clean data by removing nans and builidng indvidual dataframes for each 
def cleaner(var, data = clinical_scores):
    # Convert all values to string as even the nans were of type float
    data[var] = pd.DataFrame(data[var].values.astype('str'))
    # Build new dataframe for each individual variable by locating only those values that are not nan
    var = pd.DataFrame(data[var].loc[data[var] != 'nan'])
    # Reinstate float type to all values for the individual variables dataframe
    var = var.astype('float')
    
    return var

In [10]:
aq_sum = cleaner('aq')
bdi_sum = cleaner('bdi')
wurs_sum = cleaner('wurs')

print(f'aq length: {len(aq_sum)}')
print(f'bdi length: {len(bdi_sum)}')
print(f'wurs length: {len(wurs_sum)}')

# After removing nans, each dataframe now has different lengths

aq length: 101
bdi length: 104
wurs length: 105


## Cleaned Clinical Data

In [11]:
# merge dataframes on index
clinical_cleaned_df = pd.merge(aq_sum, bdi_sum, how='inner', left_index=True, right_index=True)
clinical_cleaned_df = pd.merge(clinical_cleaned_df, wurs_sum, how='inner', left_index=True, right_index=True)

print(f'new length of clinical dataframe: {len(clinical_cleaned_df)}')
# All values are now of type float and

new length of clinical dataframe: 100


In [12]:
# Find indices of new dataframe to be used later
indices = clinical_cleaned_df.index

# **Correlation coefficient**

In [13]:
from scipy.stats import pearsonr, spearmanr

## Pearsons correlation coefficient

In [14]:
# Calculate Pearson's correlation coefficient
def pearson_corr(var1, var2, data=clinical_cleaned_df):
    alpha = 0.05
    corr_value, p_value, = pearsonr(data[var1], data[var2])
    if p_value <= alpha:
        print(f"Pearson's correlation coefficient & p-value for {var1} & {var2}: {corr_value} | {p_value}")

## Spearmans correlation coefficient

In [15]:
# Calculate Spearman's correlation coefficient
def spearman_corr(var1, var2, data=clinical_cleaned_df): # var as type string
    alpha = 0.05
    corr_value, p_value, = spearmanr(data[var1], data[var2])
    if p_value <= alpha:
        print(f"Spearman's correlation coefficient & p-value for {var1} & {var2}: {corr_value} | {p_value}")

# Both pearson and spearman are implemented in python's scipy library, and take two arrays as input,
# returns correlation and p-value as output

### AQ vs. BDI

In [16]:
aq_bdi_pearson = pearson_corr('aq', 'bdi')
aq_bdi_spearman = spearman_corr('aq', 'bdi')

### AQ vs. WURS

In [17]:
aq_wurs_pearson = pearson_corr('aq', 'wurs')
aq_wurs_spearman = spearman_corr('aq', 'wurs')

Pearson's correlation coefficient & p-value for aq & wurs: 0.22224855847352243 | 0.026255151542577754


### BDI vs. WURS

In [18]:
bdi_wurs_pearson = pearson_corr('bdi', 'wurs')
bdi_wurs_spearman = spearman_corr('bdi', 'wurs')

Pearson's correlation coefficient & p-value for bdi & wurs: 0.36237922967493963 | 0.00021139852994281658
Spearman's correlation coefficient & p-value for bdi & wurs: 0.32629316767677363 | 0.0009227610023327782


# **Feature Extraction & Engineering**

## x, y coordinates viwed the MOST and LEAST

In [19]:
from collections import Counter

### Dictionaries

In [20]:
# Instantiating empty dictionaries: keys = participant ID; values = most viewed and least viewed x, y coordinate pairs
max_coordinates = {}
all_max_coordinates = {}

min_coordinates = {}
all_min_coordinates = {}

# Run for loop through each participant's x, y coordinate data in data_df
for participant in data:
    participant_counts = {} # Instantiate empty dictionary to hold each individual participant's eye-tracking data
    
    # Zip a single individual's x: (GazeX), and y: (GazeY) data
    coordinates = zip(data[participant]['GazeX'], data[participant]['GazeY'])
    # Transform zipped data into a list where each individual x, y coordinate is a tuple [(x,y), (x,y)...]
    coordinates = list(coordinates)
    
    # Now loop through each x, y coordinate tuple pair
    for xy in coordinates:
        if xy not in participant_counts: # If the pair is not in the earlier instantited participant dict
            participant_counts[xy] = 1 # Add this pais as a key with a value of 1
        else:
            participant_counts[xy] += 1 # If the pair is already in the dictionary, then add a count of 1 to its value

    # Find the maximum count in the participant_counts dictionary
    max_count = max(participant_counts.values())

    # Find all coordinate tuples with the maximum count
    max_coordinates[participant] = (max(participant_counts, key=participant_counts.get), max_count)

    # Add the participant ID and the list of coordinate tuples with the maximum count to all_max_coordinates
    all_max_coordinates[participant] = [coord for coord, count in participant_counts.items() if count == max_count]

    # Find the minimum count in the participant_counts dictionary
    min_count = min(participant_counts.values())

    # Find all coordinate tuples with the minimum count
    min_coordinates[participant] = (min(participant_counts, key=participant_counts.get), min_count)

    # Add the participant ID and the list of coordinate tuples with the minimum count to all_min_coordinates
    all_min_coordinates[participant] = [coord for coord, count in participant_counts.items() if count == min_count]

### Lists

In [21]:
# Create lists from the dictionaries
max_coordinates_list = [i[0] for i in max_coordinates.values()]
all_max_coordinates_list = [coords for sublist in all_max_coordinates.values() for coords in sublist]
min_coordinates_list = [i[0] for i in min_coordinates.values()]
all_min_coordinates_list = [coords for sublist in all_min_coordinates.values() for coords in sublist]

### Filtered Lists

In [22]:
# Filtering lists with common indices to make sure only those with available clinical scores are kept
filtered_max_coordinates_list = [max_coordinates_list[i] for i in indices if i < len(max_coordinates_list)]
filtered_min_coordinates_list = [min_coordinates_list[i] for i in indices if i < len(min_coordinates_list)]

## Neighborhood surrounding max x, y coordinate pairs

In [23]:
# Instantite empty list to hold all the neighborhood surrounding of coordinates surrounding max x, y coordinate pairs
range_neighborhood_dic = {}

# Iterate over the range_neighborhood_dic
for participant, (coordinate, count) in max_coordinates.items():
    x, y = coordinate

    # Create a list to hold the coordinate pairs within our range
    pairs_within_range = []

    # Iterate from (x-5, y-5) to (x+5, y+5) and append each pair to the list
    for i in range(x-5, x+5):
        for j in range(y-5, y+5):
            pairs_within_range.append((i, j))

    # Add the list of coordinate pairs to the range_neighborhood_dic
    range_neighborhood_dic[participant] = pairs_within_range

### List

In [24]:
# Instantiate an empty list to hold all the max x, y coordinate pairs
range_neighborhood_list = []

# Iterate over the range_neighborhood_dic dictionary
for pairs_within_range in range_neighborhood_dic.values():
    # Extend the range_neighborhood_list with the coordinate pairs within range
    range_neighborhood_list.extend(pairs_within_range)

### Dataframe

In [25]:
# Convert the dictionary to a dataframe
range_neighborhood_df = pd.DataFrame.from_dict(range_neighborhood_dic, orient='index')

# Reset the index to make the participant column a normal column
range_neighborhood_df = range_neighborhood_df.reset_index()

# Rename columns
range_neighborhood_df = range_neighborhood_df.rename(columns={'index': 'participant', 0: 'main_coordinate'})

# Drop columns so we only get values
range_neighborhood_df = range_neighborhood_df.drop({'participant', 'main_coordinate'}, axis=1)

In [26]:
range_neighborhood_df.head(3)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,90,91,92,93,94,95,96,97,98,99
0,"(938, 526)","(938, 527)","(938, 528)","(938, 529)","(938, 530)","(938, 531)","(938, 532)","(938, 533)","(938, 534)","(939, 525)",...,"(947, 525)","(947, 526)","(947, 527)","(947, 528)","(947, 529)","(947, 530)","(947, 531)","(947, 532)","(947, 533)","(947, 534)"
1,"(938, 589)","(938, 590)","(938, 591)","(938, 592)","(938, 593)","(938, 594)","(938, 595)","(938, 596)","(938, 597)","(939, 588)",...,"(947, 588)","(947, 589)","(947, 590)","(947, 591)","(947, 592)","(947, 593)","(947, 594)","(947, 595)","(947, 596)","(947, 597)"
2,"(968, 592)","(968, 593)","(968, 594)","(968, 595)","(968, 596)","(968, 597)","(968, 598)","(968, 599)","(968, 600)","(969, 591)",...,"(977, 591)","(977, 592)","(977, 593)","(977, 594)","(977, 595)","(977, 596)","(977, 597)","(977, 598)","(977, 599)","(977, 600)"


## Euclidean Distance
Between most and least viewed x, y coordinate pairs

In [27]:
# Initialize a list to store the Euclidean distances
ed_distances = []

# Iterate over the participants' data
for participant in min_coordinates:
    # Grab least and most viewed x, y coordinate pairs
    least_viewed_pair = min_coordinates[participant][0]
    most_viewed_pair = max_coordinates[participant][0]

    # Convert the coordinate pairs to arrays
    least_viewed_array = np.array(least_viewed_pair)
    most_viewed_array = np.array(most_viewed_pair)

    # Calculate Euclidean distance
    distance = np.linalg.norm(least_viewed_array - most_viewed_array)
    
    # Append distances to the list
    ed_distances.append(distance)

# for i, distance in enumerate(distances):
#     print(f"Euclidean distance for participant {i+1}: {distance}")

In [28]:
# Convert list to dataframe and then rename columns
ed_distances_pd = pd.DataFrame(ed_distances)
ed_distances_pd = ed_distances_pd.rename(columns={0:'ed_distances'})

# **Statistics**
1. Mean
2. Standard deviation
3. Median
4. Pearsons correlation coefficient
5. Range
6. Unique values

In [29]:
# Building empty lists to hold statistics

# Mean x, y coordinates
xval_mean = []
yval_mean = []

# Standard deviation of x, y coordinates
xval_std = []
yval_std = []

# Median of x, y coordinates
xval_med = []
yval_med = []

# Pearson correlation coefficient of x, y coordinates
xy_pearson_cc = []

# Spearman correlation coefficient of x, y coordinates
xy_spearman_cc = []

# Range of x, y coordinates
x_range = []
y_range = []

# Unique values of x, y coordinates
unique_vals_mean = []

for name in data:
    xval_mean.append(data[name]['GazeX'].sum()/len(data[name]))
    xval_std.append(np.std(data[name]['GazeX']))
    xval_med.append(data[name]['GazeX'].median())
    xy_pearson_cc.append(data[name]['GazeX'].corr(data[name]['GazeY'])) # pearsons correlation coefficient
    xy_spearman_cc.append(data[name]['GazeX'].corr(data[name]['GazeY'], method='spearman')) # spearmans correlation coefficient
    x_range.append(data[name]['GazeX'].max() - data[name]['GazeX'].min())
    unique_vals_mean.append(data[name].drop_duplicates(subset=['GazeX', 'GazeY']).sum()/len(data[name]))
    
for name in data:
    yval_mean.append(data[name]['GazeY'].sum()/len(data[name]))
    yval_std.append(np.std(data[name]['GazeY']))
    yval_med.append(data[name]['GazeY'].median())
    y_range.append(data[name]['GazeY'].max() - data[name]['GazeY'].min())

## **All statistics variables**

In [30]:
# Mean
meanvals_df = pd.DataFrame({'x': xval_mean, 'y': yval_mean})

# Standard deviation
stdvals_df = pd.DataFrame({'x': xval_std, 'y': yval_std})

# Median
medvals_df = pd.DataFrame({'x': xval_med, 'y': yval_med})

# Range
xy_range_df = pd.DataFrame({'x': x_range, 'y': y_range})

# Pearsons CC
xy_pearson_df = pd.DataFrame({'pearsons_cc': xy_pearson_cc})

# Spearmans CC
xy_spearman_df = pd.DataFrame({'spearmans_cc': xy_spearman_cc})

# Unique values mean
unique_vals_mean_df = pd.DataFrame(unique_vals_mean)

In [31]:
unique_vals_mean_df.head(3)

Unnamed: 0,GazeX,GazeY
0,916.421785,525.912913
1,940.963047,540.806378
2,914.122155,582.386697


# **Train-Test Split for Regressors**

In [32]:
from sklearn.model_selection import train_test_split

In [33]:
# Building train-test split function for regressors

def split(input_feature, clinical_var, indices=indices, clinical_data=clinical_cleaned_df):

    # Creating x input feature
    x = input_feature

    # Creating y target variable
    y = clinical_cleaned_df[clinical_var]
    
    # If the length of input variable x is not equal to label y, find only common indices
    if len(x) != len(y):
        x = x.loc[indices]

    # Convert to arrays
    x_arr = np.array(x)
    y_arr = np.array(y)
    
    # Perform train-test split
    x_train, x_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=0.2, random_state=42)

    return x_train, x_test, y_train, y_test

## Least viewed x, y coordinate pairs

In [34]:
# least viwed = lv

# AQ
lv_x_train_aq, lv_x_test_aq, lv_y_train_aq, lv_y_test_aq = split(filtered_min_coordinates_list, 'aq')

# BDI
lv_x_train_bdi, lv_x_test_bdi, lv_y_train_bdi, lv_y_test_bdi = split(filtered_min_coordinates_list, 'bdi')

# WURS
lv_x_train_wurs, lv_x_test_wurs, lv_y_train_wurs, lv_y_test_wurs = split(filtered_min_coordinates_list, 'wurs')

## Most viewed x, y coordinate pairs

In [35]:
# most viewed = mv

# AQ
mv_x_train_aq, mv_x_test_aq, mv_y_train_aq, mv_y_test_aq = split(filtered_max_coordinates_list, 'aq')

# BDI
mv_x_train_bdi, mv_x_test_bdi, mv_y_train_bdi, mv_y_test_bdi = split(filtered_max_coordinates_list, 'bdi')

# WURS
mv_x_train_wurs, mv_x_test_wurs, mv_y_train_wurs, mv_y_test_wurs = split(filtered_max_coordinates_list, 'wurs')

## Neighborhood surrounding max x, y coordinate pairs

In [36]:
# AQ
range_neighborhood_x_train_aq, range_neighborhood_x_test_aq, range_neighborhood_y_train_aq, range_neighborhood_y_test_aq = split(range_neighborhood_df, 'aq')

# BDI
range_neighborhood_x_train_bdi, range_neighborhood_x_test_bdi, range_neighborhood_y_train_bdi, range_neighborhood_y_test_bdi = split(range_neighborhood_df, 'bdi')

# WURS
range_neighborhood_x_train_wurs, range_neighborhood_x_test_wurs, range_neighborhood_y_train_wurs, range_neighborhood_y_test_wurs = split(range_neighborhood_df, 'wurs')


## Euclidean Distance

In [37]:
# AQ
ed_x_train_aq, ed_x_test_aq, ed_y_train_aq, ed_y_test_aq = split(ed_distances_pd, 'aq')

# BDI
ed_x_train_bdi, ed_x_test_bdi, ed_y_train_bdi, ed_y_test_bdi = split(ed_distances_pd, 'bdi')

# WURS
ed_x_train_wurs, ed_x_test_wurs, ed_y_train_wurs, ed_y_test_wurs = split(ed_distances_pd, 'wurs')

## Mean

In [38]:
# AQ
mean_x_train_aq, mean_x_test_aq, mean_y_train_aq, mean_y_test_aq = split(meanvals_df, 'aq')

# BDI
mean_x_train_bdi, mean_x_test_bdi, mean_y_train_bdi, mean_y_test_bdi = split(meanvals_df, 'bdi')

# WURS
mean_x_train_wurs, mean_x_test_wurs, mean_y_train_wurs, mean_y_test_wurs = split(meanvals_df, 'wurs')

## Standard deviation

In [39]:
# AQ
std_x_train_aq, std_x_test_aq, std_y_train_aq, std_y_test_aq = split(stdvals_df, 'aq')

# BDI
std_x_train_bdi, std_x_test_bdi, std_y_train_bdi, std_y_test_bdi = split(stdvals_df, 'bdi')

# WURS
std_x_train_wurs, std_x_test_wurs, std_y_train_wurs, std_y_test_wurs = split(stdvals_df, 'wurs')

## Median

In [40]:
# AQ
med_x_train_aq, med_x_test_aq, med_y_train_aq, med_y_test_aq = split(medvals_df, 'aq')

# BDI
med_x_train_bdi, med_x_test_bdi, med_y_train_bdi, med_y_test_bdi = split(medvals_df, 'bdi')

# WURS
med_x_train_wurs, med_x_test_wurs, med_y_train_wurs, med_y_test_wurs = split(medvals_df, 'wurs')

## Pearsons correlation coefficient

In [41]:
# AQ
xy_pearson_cc_x_train_aq, xy_pearson_cc_x_test_aq, xy_pearson_cc_y_train_aq, xy_pearson_cc_y_test_aq = split(xy_pearson_df, 'aq')

# BDI
xy_pearson_cc_x_train_bdi, xy_pearson_cc_x_test_bdi, xy_pearson_cc_y_train_bdi, xy_pearson_cc_y_test_bdi = split(xy_pearson_df, 'bdi')

# WURS
xy_pearson_cc_x_train_wurs, xy_pearson_cc_x_test_wurs, xy_pearson_cc_y_train_wurs, xy_pearson_cc_y_test_wurs = split(xy_pearson_df, 'wurs')

## Spearmans correlation coefficient

In [42]:
# AQ
xy_spearman_cc_x_train_aq, xy_spearman_cc_x_test_aq, xy_spearman_cc_y_train_aq, xy_spearman_cc_y_test_aq = split(xy_spearman_df, 'aq')

# BDI
xy_spearman_cc_x_train_bdi, xy_spearman_cc_x_test_bdi, xy_spearman_cc_y_train_bdi, xy_spearman_cc_y_test_bdi = split(xy_spearman_df, 'bdi')

# WURS
xy_spearman_cc_x_train_wurs, xy_spearman_cc_x_test_wurs, xy_spearman_cc_y_train_wurs, xy_spearman_cc_y_test_wurs = split(xy_spearman_df, 'wurs')

## Range

In [43]:
# AQ
xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq = split(xy_range_df, 'aq')

# BDI
xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi = split(xy_range_df, 'bdi')

# WURS
xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs = split(xy_range_df, 'wurs')

## Unique values mean

In [44]:
# AQ
xy_unique_x_train_aq, xy_unique_x_test_aq, xy_unique_y_train_aq, xy_unique_y_test_aq = split(unique_vals_mean_df, 'aq')

# BDI
xy_unique_x_train_bdi, xy_unique_x_test_bdi, xy_unique_y_train_bdi, xy_unique_y_test_bdi = split(unique_vals_mean_df, 'bdi')

# WURS
xy_unique_x_train_wurs, xy_unique_x_test_wurs, xy_unique_y_train_wurs, xy_unique_y_test_wurs = split(unique_vals_mean_df, 'wurs')

# **Regressors with Train-Test Split**

## **Linear regression**

In [45]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

In [46]:
# Linear Regression with train-test split

def lr_model(x_train, x_test, y_train, y_test):
    
    # Create linear regression object and fit model
    lr_model = LinearRegression().fit(x_train, y_train)
    
    # Make predictions
    y_pred = lr_model.predict(x_test)
    
    # Evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f'R-squared:', r2)
    print(f'MAE:', mae)
    print()

### Least viewed x, y coordinate pairs

In [47]:
# AQ
lr_model(lv_x_train_aq, lv_x_test_aq, lv_y_train_aq, lv_y_test_aq)

# BDI
lr_model(lv_x_train_bdi, lv_x_test_bdi, lv_y_train_bdi, lv_y_test_bdi)

# WURS
lr_model(lv_x_train_wurs, lv_x_test_wurs, lv_y_train_wurs, lv_y_test_wurs)

R-squared: 0.021613834680677013
MAE: 5.244458520575735

R-squared: -0.042601423252847326
MAE: 8.979706334622973

R-squared: -0.19927118681570022
MAE: 12.772782064864403



### Most viewed x, y coordinate pairs

In [48]:
# AQ
lr_model(mv_x_train_aq, mv_x_test_aq, mv_y_train_aq, mv_y_test_aq)

# BDI
lr_model(mv_x_train_bdi, mv_x_test_bdi, mv_y_train_bdi, mv_y_test_bdi)

# WURS
lr_model(mv_x_train_wurs, mv_x_test_wurs, mv_y_train_wurs, mv_y_test_wurs)

R-squared: -0.040955183484325364
MAE: 5.2773426939140196

R-squared: -0.24607811546847835
MAE: 9.938505948890214

R-squared: -0.07181616569254334
MAE: 11.993947227802



### Euclidean Distance

In [49]:
# AQ
lr_model(ed_x_train_aq, ed_x_test_aq, ed_y_train_aq, ed_y_test_aq)

# BDI
lr_model(ed_x_train_bdi, ed_x_test_bdi, ed_y_train_bdi, ed_y_test_bdi)

# WURS
lr_model(ed_x_train_wurs, ed_x_test_wurs, ed_y_train_wurs, ed_y_test_wurs)

R-squared: -0.055285738500697956
MAE: 5.524887810871791

R-squared: -0.140045186931822
MAE: 8.77194467915799

R-squared: -0.24438333350680574
MAE: 13.057030241922718



### Mean

In [50]:
# AQ
lr_model(mean_x_train_aq, mean_x_test_aq, mean_y_train_aq, mean_y_test_aq)

# BDI
lr_model(mean_x_train_bdi, mean_x_test_bdi, mean_y_train_bdi, mean_y_test_bdi)

# WURS
lr_model(mean_x_train_wurs, mean_x_test_wurs, mean_y_train_wurs, mean_y_test_wurs)

R-squared: 0.1520399905606088
MAE: 4.80365342994894

R-squared: 0.00763952689464098
MAE: 8.56250682103104

R-squared: -0.03662561093350769
MAE: 12.323251871977025



### Standard deviation

In [51]:
# AQ
lr_model(std_x_train_aq, std_x_test_aq, std_y_train_aq, std_y_test_aq)

# BDI
lr_model(std_x_train_bdi, std_x_test_bdi, std_y_train_bdi, std_y_test_bdi)

# WURS
lr_model(std_x_train_wurs, std_x_test_wurs, std_y_train_wurs, std_y_test_wurs)

R-squared: -0.04884495998485194
MAE: 5.41011240293402

R-squared: 0.003156360116893131
MAE: 8.79157839440711

R-squared: -0.09764300278388172
MAE: 12.22025419820296



### Median

In [52]:
# AQ
lr_model(med_x_train_aq, med_x_test_aq, med_y_train_aq, med_y_test_aq)

# BDI
lr_model(med_x_train_bdi, med_x_test_bdi, med_y_train_bdi, med_y_test_bdi)

# WURS
lr_model(med_x_train_wurs, med_x_test_wurs, med_y_train_wurs, med_y_test_wurs)

R-squared: 0.13387036402713004
MAE: 4.806965362679267

R-squared: -0.04993454848213519
MAE: 9.07688683067543

R-squared: -0.06778592679771767
MAE: 12.3648162826767



### Pearsons CC

In [53]:
# AQ
lr_model(xy_pearson_cc_x_train_aq, xy_pearson_cc_x_test_aq, xy_pearson_cc_y_train_aq, xy_pearson_cc_y_test_aq)

# BDI
lr_model(xy_pearson_cc_x_train_bdi, xy_pearson_cc_x_test_bdi, xy_pearson_cc_y_train_bdi, xy_pearson_cc_y_test_bdi)

# WURS
lr_model(xy_pearson_cc_x_train_wurs, xy_pearson_cc_x_test_wurs, xy_pearson_cc_y_train_wurs, xy_pearson_cc_y_test_wurs)

R-squared: -0.057954001665695865
MAE: 5.436702021338882

R-squared: -0.037868739808829766
MAE: 8.945116402978591

R-squared: -0.32235049583905107
MAE: 12.45943945050115



### Spearmans CC

In [54]:
# AQ
lr_model(xy_spearman_cc_x_train_aq, xy_spearman_cc_x_test_aq, xy_spearman_cc_y_train_aq, xy_spearman_cc_y_test_aq)

# BDI
lr_model(xy_spearman_cc_x_train_bdi, xy_spearman_cc_x_test_bdi, xy_spearman_cc_y_train_bdi, xy_spearman_cc_y_test_bdi)

# WURS
lr_model(xy_spearman_cc_x_train_wurs, xy_spearman_cc_x_test_wurs, xy_spearman_cc_y_train_wurs, xy_spearman_cc_y_test_wurs)

R-squared: -0.08701012892910787
MAE: 5.539170909689169

R-squared: -0.008456872421494843
MAE: 8.792540437707627

R-squared: -0.21191268261759078
MAE: 12.283671399267186



### Range

In [55]:
# AQ
lr_model(xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq)

# BDI
lr_model(xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi)

# WURS
lr_model(xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs)

R-squared: -0.21848430106057548
MAE: 5.806209328149734

R-squared: -0.2049543340114739
MAE: 9.433138190631832

R-squared: -0.08670180161403684
MAE: 12.03632730407916



### Unique values

In [56]:
# AQ
lr_model(xy_unique_x_train_aq, xy_unique_x_test_aq, xy_unique_y_train_aq, xy_unique_y_test_aq)

# BDI
lr_model(xy_unique_x_train_bdi, xy_unique_x_test_bdi, xy_unique_y_train_bdi, xy_unique_y_test_bdi)

# WURS
lr_model(xy_unique_x_train_wurs, xy_unique_x_test_wurs, xy_unique_y_train_wurs, xy_unique_y_test_wurs)

R-squared: 0.054674749739499484
MAE: 5.063711968641951

R-squared: -0.025983844385265353
MAE: 9.206153596459842

R-squared: -0.04178097113925938
MAE: 12.054695085313345



## **Decision Tree**

In [57]:
from sklearn.tree import DecisionTreeRegressor

In [58]:
# Decision Tree regressor with train-test split

def decision_tree_regressor(x_train, x_test, y_train, y_test):
    # Create decision tree regressor object and fit data
    dt_rgr = DecisionTreeRegressor(random_state=42).fit(x_train, y_train)
    
    # Make predictions
    y_pred = dt_rgr.predict(x_test)
    
    # Evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"R-squared: {r2}")
    print(f"MAE: {mae}")
    print()

### Least viewed x, y coordinate pairs

In [59]:
# AQ
decision_tree_regressor(lv_x_train_aq, lv_x_test_aq, lv_y_train_aq, lv_y_test_aq)

# BDI
decision_tree_regressor(lv_x_train_bdi, lv_x_test_bdi, lv_y_train_bdi, lv_y_test_bdi)

# WURS
decision_tree_regressor(lv_x_train_wurs, lv_x_test_wurs, lv_y_train_wurs, lv_y_test_wurs)

R-squared: -2.177832679896254
MAE: 9.669

R-squared: -1.8477544780438269
MAE: 14.495

R-squared: -2.482027336481772
MAE: 23.398



### Most viewed x, y coordinate pairs

In [60]:
# AQ
decision_tree_regressor(mv_x_train_aq, mv_x_test_aq, mv_y_train_aq, mv_y_test_aq)

# BDI
decision_tree_regressor(mv_x_train_bdi, mv_x_test_bdi, mv_y_train_bdi, mv_y_test_bdi)

# WURS
decision_tree_regressor(mv_x_train_wurs, mv_x_test_wurs, mv_y_train_wurs, mv_y_test_wurs)

R-squared: -2.2824657699499364
MAE: 8.85

R-squared: -1.408825300895773
MAE: 13.59

R-squared: -1.3071230585115807
MAE: 16.7725



### Neighborhood surrounding max x, y coordinate pairs

In [61]:
# AQ
decision_tree_regressor(xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq)

# BDI
decision_tree_regressor(xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi)

# WURS
decision_tree_regressor(xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs)

R-squared: -1.2220766330900537
MAE: 8.419250000000002

R-squared: -2.977892224376897
MAE: 18.79225

R-squared: -3.96676534742618
MAE: 25.887



### Euclidean Distance

In [62]:
# AQ
decision_tree_regressor(ed_x_train_aq, ed_x_test_aq, ed_y_train_aq, ed_y_test_aq)

# BDI
decision_tree_regressor(ed_x_train_bdi, ed_x_test_bdi, ed_y_train_bdi, ed_y_test_bdi)

# WURS
decision_tree_regressor(ed_x_train_wurs, ed_x_test_wurs, ed_y_train_wurs, ed_y_test_wurs)

R-squared: -1.748227758007118
MAE: 8.861999999999998

R-squared: -1.006468432276976
MAE: 11.7345

R-squared: -2.556695273700571
MAE: 21.723000000000003



### Mean

In [63]:
# AQ
decision_tree_regressor(mean_x_train_aq, mean_x_test_aq, mean_y_train_aq, mean_y_test_aq)

# BDI
decision_tree_regressor(mean_x_train_bdi, mean_x_test_bdi, mean_y_train_bdi, mean_y_test_bdi)

# WURS
decision_tree_regressor(mean_x_train_wurs, mean_x_test_wurs, mean_y_train_wurs, mean_y_test_wurs)

R-squared: -1.3582334278303874
MAE: 8.088

R-squared: -0.8501316927246894
MAE: 9.699000000000002

R-squared: -1.3457421156631901
MAE: 18.307499999999997



### Standard deviation

In [64]:
# AQ
decision_tree_regressor(std_x_train_aq, std_x_test_aq, std_y_train_aq, std_y_test_aq)

# BDI
decision_tree_regressor(std_x_train_bdi, std_x_test_bdi, std_y_train_bdi, std_y_test_bdi)

# WURS
decision_tree_regressor(std_x_train_wurs, std_x_test_wurs, std_y_train_wurs, std_y_test_wurs)

R-squared: -0.936119548826829
MAE: 7.033500000000001

R-squared: -1.9299431816569337
MAE: 14.9275

R-squared: -0.39832620818420916
MAE: 14.560500000000001



### Median

In [65]:
# AQ
decision_tree_regressor(med_x_train_aq, med_x_test_aq, med_y_train_aq, med_y_test_aq)

# BDI
decision_tree_regressor(med_x_train_bdi, med_x_test_bdi, med_y_train_bdi, med_y_test_bdi)

# WURS
decision_tree_regressor(med_x_train_wurs, med_x_test_wurs, med_y_train_wurs, med_y_test_wurs)

R-squared: -2.3771934374811505
MAE: 9.9195

R-squared: -1.8337461269895536
MAE: 14.2815

R-squared: -2.624535348618149
MAE: 25.586999999999996



### Pearsons CC

In [66]:
# AQ
decision_tree_regressor(xy_pearson_cc_x_train_aq, xy_pearson_cc_x_test_aq, xy_pearson_cc_y_train_aq, xy_pearson_cc_y_test_aq)

# BDI
decision_tree_regressor(xy_pearson_cc_x_train_bdi, xy_pearson_cc_x_test_bdi, xy_pearson_cc_y_train_bdi, xy_pearson_cc_y_test_bdi)

# WURS
decision_tree_regressor(xy_pearson_cc_x_train_wurs, xy_pearson_cc_x_test_wurs, xy_pearson_cc_y_train_wurs, xy_pearson_cc_y_test_wurs)

R-squared: -2.2754858556004582
MAE: 8.673

R-squared: -2.808162831368476
MAE: 18.509999999999998

R-squared: -2.0416391675322427
MAE: 19.7025



### Spearmans CC

In [67]:
# AQ
decision_tree_regressor(xy_spearman_cc_x_train_aq, xy_spearman_cc_x_test_aq, xy_spearman_cc_y_train_aq, xy_spearman_cc_y_test_aq)

# BDI
decision_tree_regressor(xy_spearman_cc_x_train_bdi, xy_spearman_cc_x_test_bdi, xy_spearman_cc_y_train_bdi, xy_spearman_cc_y_test_bdi)

# WURS
decision_tree_regressor(xy_spearman_cc_x_train_wurs, xy_spearman_cc_x_test_wurs, xy_spearman_cc_y_train_wurs, xy_spearman_cc_y_test_wurs)

R-squared: -0.47457144580493393
MAE: 6.4719999999999995

R-squared: -3.1947773055832265
MAE: 17.634999999999998

R-squared: -3.225743486515607
MAE: 26.3315



### Range

In [68]:
# AQ
decision_tree_regressor(xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq)

# BDI
decision_tree_regressor(xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi)

# WURS
decision_tree_regressor(xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs)

R-squared: -1.2220766330900537
MAE: 8.419250000000002

R-squared: -2.977892224376897
MAE: 18.79225

R-squared: -3.96676534742618
MAE: 25.887



### Unique values

In [69]:
# AQ
decision_tree_regressor(xy_unique_x_train_aq, xy_unique_x_test_aq, xy_unique_y_train_aq, xy_unique_y_test_aq)

# BDI
decision_tree_regressor(xy_unique_x_train_bdi, xy_unique_x_test_bdi, xy_unique_y_train_bdi, xy_unique_y_test_bdi)

# WURS
decision_tree_regressor(xy_unique_x_train_wurs, xy_unique_x_test_wurs, xy_unique_y_train_wurs, xy_unique_y_test_wurs)

R-squared: -1.515079196573979
MAE: 8.163499999999999

R-squared: -2.893852847761028
MAE: 17.297

R-squared: -2.278166086983042
MAE: 20.201999999999998



## **Random Forest Regressor**

In [70]:
from sklearn.ensemble import RandomForestRegressor

In [71]:
# Random forest regressor with train-test split

def random_forest_regressor(x_train, x_test, y_train, y_test):
    # Create random forest regressor object and fit data
    rf_rgr = RandomForestRegressor(n_estimators=100, random_state=42).fit(x_train, y_train)
    
    # Make predictions
    y_pred = rf_rgr.predict(x_test)
    
    # Evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"R^2 Score: {r2}")
    print(f"Mean Absolute Error: {mae}")
    print()

### Least viewed x, y coordinate pairs

In [72]:
# AQ
random_forest_regressor(lv_x_train_aq, lv_x_test_aq, lv_y_train_aq, lv_y_test_aq)

# BDI
random_forest_regressor(lv_x_train_bdi, lv_x_test_bdi, lv_y_train_bdi, lv_y_test_bdi)

# WURS
random_forest_regressor(lv_x_train_wurs, lv_x_test_wurs, lv_y_train_wurs, lv_y_test_wurs)

R^2 Score: -0.557186907750769
Mean Absolute Error: 6.94623

R^2 Score: -0.7212256675047406
Mean Absolute Error: 10.657754999999998

R^2 Score: -1.0916885183042573
Mean Absolute Error: 18.210424999999997



### Most viewed x, y coordinate pairs

In [73]:
# AQ
random_forest_regressor(mv_x_train_aq, mv_x_test_aq, mv_y_train_aq, mv_y_test_aq)

# BDI
random_forest_regressor(mv_x_train_bdi, mv_x_test_bdi, mv_y_train_bdi, mv_y_test_bdi)

# WURS
random_forest_regressor(mv_x_train_wurs, mv_x_test_wurs, mv_y_train_wurs, mv_y_test_wurs)

R^2 Score: -0.798609130948791
Mean Absolute Error: 6.619569999999999

R^2 Score: -0.4101059413251875
Mean Absolute Error: 11.136219999999998

R^2 Score: -0.6478091957903651
Mean Absolute Error: 14.226250000000002



### Neighborhood surrounding max x, y coordinate pairs

In [74]:
# AQ
random_forest_regressor(xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq)

# BDI
random_forest_regressor(xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi)

# WURS
random_forest_regressor(xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs)

R^2 Score: -0.4583939792726943
Mean Absolute Error: 6.489335500000001

R^2 Score: -0.5775121208827869
Mean Absolute Error: 10.458106190476189

R^2 Score: -0.846147081252405
Mean Absolute Error: 15.827981190476185



### Euclidean Distance

In [75]:
# AQ
random_forest_regressor(ed_x_train_aq, ed_x_test_aq, ed_y_train_aq, ed_y_test_aq)

# BDI
random_forest_regressor(ed_x_train_bdi, ed_x_test_bdi, ed_y_train_bdi, ed_y_test_bdi)

# WURS
random_forest_regressor(ed_x_train_wurs, ed_x_test_wurs, ed_y_train_wurs, ed_y_test_wurs)

R^2 Score: -0.8782581678335541
Mean Absolute Error: 7.4686112499999995

R^2 Score: -0.42409579441685064
Mean Absolute Error: 9.748343749999998

R^2 Score: -1.22061832370991
Mean Absolute Error: 18.385835



### Mean

In [76]:
# AQ
random_forest_regressor(mean_x_train_aq, mean_x_test_aq, mean_y_train_aq, mean_y_test_aq)

# BDI
random_forest_regressor(mean_x_train_bdi, mean_x_test_bdi, mean_y_train_bdi, mean_y_test_bdi)

# WURS
random_forest_regressor(mean_x_train_wurs, mean_x_test_wurs, mean_y_train_wurs, mean_y_test_wurs)

R^2 Score: -0.35509969725556534
Mean Absolute Error: 5.637990000000003

R^2 Score: -0.4773782680087211
Mean Absolute Error: 9.720745000000003

R^2 Score: -0.20508959146181094
Mean Absolute Error: 13.296169999999998



### Standard deviation

In [77]:
# AQ
random_forest_regressor(std_x_train_aq, std_x_test_aq, std_y_train_aq, std_y_test_aq)

# BDI
random_forest_regressor(std_x_train_bdi, std_x_test_bdi, std_y_train_bdi, std_y_test_bdi)

# WURS
random_forest_regressor(std_x_train_wurs, std_x_test_wurs, std_y_train_wurs, std_y_test_wurs)

R^2 Score: -0.03414416725978642
Mean Absolute Error: 5.466665000000001

R^2 Score: -0.6394270371527389
Mean Absolute Error: 10.565845

R^2 Score: 0.021321263196699136
Mean Absolute Error: 11.72871



### Median

In [78]:
# AQ
random_forest_regressor(med_x_train_aq, med_x_test_aq, med_y_train_aq, med_y_test_aq)

# BDI
random_forest_regressor(med_x_train_bdi, med_x_test_bdi, med_y_train_bdi, med_y_test_bdi)

# WURS
random_forest_regressor(med_x_train_wurs, med_x_test_wurs, med_y_train_wurs, med_y_test_wurs)

R^2 Score: -0.91569524012305
Mean Absolute Error: 7.418160000000003

R^2 Score: -0.7980884389643974
Mean Absolute Error: 11.8884

R^2 Score: -1.0879099225448656
Mean Absolute Error: 18.514974999999996



### Pearsons CC

In [79]:
# AQ
random_forest_regressor(xy_pearson_cc_x_train_aq, xy_pearson_cc_x_test_aq, xy_pearson_cc_y_train_aq, xy_pearson_cc_y_test_aq)

# BDI
random_forest_regressor(xy_pearson_cc_x_train_bdi, xy_pearson_cc_x_test_bdi, xy_pearson_cc_y_train_bdi, xy_pearson_cc_y_test_bdi)

# WURS
random_forest_regressor(xy_pearson_cc_x_train_wurs, xy_pearson_cc_x_test_wurs, xy_pearson_cc_y_train_wurs, xy_pearson_cc_y_test_wurs)

R^2 Score: -1.4051965402135238
Mean Absolute Error: 7.847745000000003

R^2 Score: -1.4265345351044885
Mean Absolute Error: 14.63858

R^2 Score: -1.2442899404669427
Mean Absolute Error: 17.56534



### Spearmans CC

In [80]:
# AQ
random_forest_regressor(xy_spearman_cc_x_train_aq, xy_spearman_cc_x_test_aq, xy_spearman_cc_y_train_aq, xy_spearman_cc_y_test_aq)

# BDI
random_forest_regressor(xy_spearman_cc_x_train_bdi, xy_spearman_cc_x_test_bdi, xy_spearman_cc_y_train_bdi, xy_spearman_cc_y_test_bdi)

# WURS
random_forest_regressor(xy_spearman_cc_x_train_wurs, xy_spearman_cc_x_test_wurs, xy_spearman_cc_y_train_wurs, xy_spearman_cc_y_test_wurs)

R^2 Score: -0.31792302628626645
Mean Absolute Error: 6.319780000000002

R^2 Score: -1.3580197611564424
Mean Absolute Error: 13.065345000000002

R^2 Score: -1.503492040419971
Mean Absolute Error: 18.497000000000003



### Range

In [81]:
# AQ
random_forest_regressor(xy_range_x_train_aq, xy_range_x_test_aq, xy_range_y_train_aq, xy_range_y_test_aq)

# BDI
random_forest_regressor(xy_range_x_train_bdi, xy_range_x_test_bdi, xy_range_y_train_bdi, xy_range_y_test_bdi)

# WURS
random_forest_regressor(xy_range_x_train_wurs, xy_range_x_test_wurs, xy_range_y_train_wurs, xy_range_y_test_wurs)

R^2 Score: -0.4583939792726943
Mean Absolute Error: 6.489335500000001

R^2 Score: -0.5775121208827869
Mean Absolute Error: 10.458106190476189

R^2 Score: -0.846147081252405
Mean Absolute Error: 15.827981190476185



### Unique values

In [82]:
# AQ
random_forest_regressor(xy_unique_x_train_aq, xy_unique_x_test_aq, xy_unique_y_train_aq, xy_unique_y_test_aq)

# BDI
random_forest_regressor(xy_unique_x_train_bdi, xy_unique_x_test_bdi, xy_unique_y_train_bdi, xy_unique_y_test_bdi)

# WURS
random_forest_regressor(xy_unique_x_train_wurs, xy_unique_x_test_wurs, xy_unique_y_train_wurs, xy_unique_y_test_wurs)

R^2 Score: -0.490767304614272
Mean Absolute Error: 6.668580000000003

R^2 Score: -0.5986637285643213
Mean Absolute Error: 11.3314

R^2 Score: -0.9718145069662425
Mean Absolute Error: 15.711174999999997



# **Train-Test Split for Classifiers**

In [83]:
# Split function for classification models as we need to convert y input into binary values

def classification_split(input_feature, clinical_var, clinical_data=clinical_cleaned_df):

    # Creating x input feature
    x = input_feature

    # Creating y target variable
    y = clinical_data[clinical_var]

    # If the length of input variable x is not equal to label y, find only common indices
    if len(x) != len(y):
        x = x.loc[indices]

    # Encode y variable based on clinical score cut offs
    if clinical_var == 'aq':
        y = y.apply(lambda val: 1 if val >= 22 else 0)
    elif clinical_var == 'bdi':
        y = y.apply(lambda val: 1 if val >= 11 else 0)
    elif clinical_var == 'wurs':
        y = y.apply(lambda val: 1 if val >= 30 else 0)

    # Convert to arrays
    x_arr = np.array(x)
    y_arr = np.array(y)

    # Perform train-test split
    x_train, x_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=0.2, random_state=42)

    return x_train, x_test, y_train, y_test

## Least viewed x, y coordinate pairs

In [84]:
# least viwed = lv

# AQ
lv_x_train_aq_encoded, lv_x_test_aq_encoded, lv_y_train_aq_encoded, lv_y_test_aq_encoded = classification_split(filtered_min_coordinates_list, 'aq')

# BDI
lv_x_train_bdi_encoded, lv_x_test_bdi_encoded, lv_y_train_bdi_encoded, lv_y_test_bdi_encoded = classification_split(filtered_min_coordinates_list, 'bdi')

# WURS
lv_x_train_wurs_encoded, lv_x_test_wurs_encoded, lv_y_train_wurs_encoded, lv_y_test_wurs_encoded = classification_split(filtered_min_coordinates_list, 'wurs')

## Most viewed x, y coordinate pairs

In [85]:
# most viewed = mv

# AQ
mv_x_train_aq_encoded, mv_x_test_aq_encoded, mv_y_train_aq_encoded, mv_y_test_aq_encoded = classification_split(filtered_max_coordinates_list, 'aq')

# BDI
mv_x_train_bdi_encoded, mv_x_test_bdi_encoded, mv_y_train_bdi_encoded, mv_y_test_bdi_encoded = classification_split(filtered_max_coordinates_list, 'bdi')

# WURS
mv_x_train_wurs_encoded, mv_x_test_wurs_encoded, mv_y_train_wurs_encoded, mv_y_test_wurs_encoded = classification_split(filtered_max_coordinates_list, 'wurs')

## Neighborhood surrounding max x, y coordinate pairs

In [86]:
# AQ
range_neighborhood_x_train_aq_encoded, range_neighborhood_x_test_aq_encoded, range_neighborhood_y_train_aq_encoded, range_neighborhood_y_test_aq_encoded = classification_split(range_neighborhood_df, 'aq')

# BDI
range_neighborhood_x_train_bdi_encoded, range_neighborhood_x_test_bdi_encoded, range_neighborhood_y_train_bdi_encoded, range_neighborhood_y_test_bdi_encoded = classification_split(range_neighborhood_df, 'bdi')

# WURS
range_neighborhood_x_train_wurs_encoded, range_neighborhood_x_test_wurs_encoded, range_neighborhood_y_train_wurs_encoded, range_neighborhood_y_test_wurs_encoded = classification_split(range_neighborhood_df, 'wurs')


## Euclidean Distance

In [87]:
# AQ
ed_x_train_aq_encoded, ed_x_test_aq_encoded, ed_y_train_aq_encoded, ed_y_test_aq_encoded = classification_split(ed_distances_pd, 'aq')

# BDI
ed_x_train_bdi_encoded, ed_x_test_bdi_encoded, ed_y_train_bdi_encoded, ed_y_test_bdi_encoded = classification_split(ed_distances_pd, 'bdi')

# WURS
ed_x_train_wurs_encoded, ed_x_test_wurs_encoded, ed_y_train_wurs_encoded, ed_y_test_wurs_encoded = classification_split(ed_distances_pd, 'wurs')

## Mean

In [88]:
# AQ
mean_x_train_aq_encoded, mean_x_test_aq_encoded, mean_y_train_aq_encoded, mean_y_test_aq_encoded = classification_split(meanvals_df, 'aq')

# BDI
mean_x_train_bdi_encoded, mean_x_test_bdi_encoded, mean_y_train_bdi_encoded, mean_y_test_bdi_encoded = classification_split(meanvals_df, 'bdi')

# WURS
mean_x_train_wurs_encoded, mean_x_test_wurs_encoded, mean_y_train_wurs_encoded, mean_y_test_wurs_encoded = classification_split(meanvals_df, 'wurs')

## Standard deviation

In [89]:
# AQ
std_x_train_aq_encoded, std_x_test_aq_encoded, std_y_train_aq_encoded, std_y_test_aq_encoded = classification_split(stdvals_df, 'aq')

# BDI
std_x_train_bdi_encoded, std_x_test_bdi_encoded, std_y_train_bdi_encoded, std_y_test_bdi_encoded= classification_split(stdvals_df, 'bdi')

# WURS
std_x_train_wurs_encoded, std_x_test_wurs_encoded, std_y_train_wurs_encoded, std_y_test_wurs_encoded = classification_split(stdvals_df, 'wurs')

## Median

In [90]:
# AQ
med_x_train_aq_encoded, med_x_test_aq_encoded, med_y_train_aq_encoded, med_y_test_aq_encoded = classification_split(medvals_df, 'aq')

# BDI
med_x_train_bdi_encoded, med_x_test_bdi_encoded, med_y_train_bdi_encoded, med_y_test_bdi_encoded = classification_split(medvals_df, 'bdi')

# WURS
med_x_train_wurs_encoded, med_x_test_wurs_encoded, med_y_train_wurs_encoded, med_y_test_wurs_encoded = classification_split(medvals_df, 'wurs')

## Pearsons correlation coefficient

In [91]:
# AQ
xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_x_test_aq_encoded, xy_pearson_cc_y_train_aq_encoded, xy_pearson_cc_y_test_aq_encoded = classification_split(xy_pearson_df, 'aq')

# BDI
xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_x_test_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded, xy_pearson_cc_y_test_bdi_encoded = classification_split(xy_pearson_df, 'bdi')

# WURS
xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_x_test_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded, xy_pearson_cc_y_test_wurs_encoded = classification_split(xy_pearson_df, 'wurs')

## Spearmans correlation coefficient

In [92]:
# AQ
xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_x_test_aq_encoded, xy_spearman_cc_y_train_aq_encoded, xy_spearman_cc_y_test_aq_encoded = classification_split(xy_spearman_df, 'aq')

# BDI
xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_x_test_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded, xy_spearman_cc_y_test_bdi_encoded = classification_split(xy_spearman_df, 'bdi')

# WURS
xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_x_test_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded, xy_spearman_cc_y_test_wurs_encoded = classification_split(xy_spearman_df, 'wurs')

## Range

In [93]:
# AQ
xy_range_x_train_aq_encoded, xy_range_x_test_aq_encoded, xy_range_y_train_aq_encoded, xy_range_y_test_aq_encoded = classification_split(xy_range_df, 'aq')

# BDI
xy_range_x_train_bdi_encoded, xy_range_x_test_bdi_encoded, xy_range_y_train_bdi_encoded, xy_range_y_test_bdi_encoded = classification_split(xy_range_df, 'bdi')

# WURS
xy_range_x_train_wurs_encoded, xy_range_x_test_wurs_encoded, xy_range_y_train_wurs_encoded, xy_range_y_test_wurs_encoded = classification_split(xy_range_df, 'wurs')

## Unique values

In [94]:
# AQ
xy_unique_x_train_aq_encoded, xy_unique_x_test_aq_encoded, xy_unique_y_train_aq_encoded, xy_unique_y_test_aq_encoded = classification_split(unique_vals_mean_df, 'aq')

# BDI
xy_unique_x_train_bdi_encoded, xy_unique_x_test_bdi_encoded, xy_unique_y_train_bdi_encoded, xy_unique_y_test_bdi_encoded = classification_split(unique_vals_mean_df, 'bdi')

# WURS
xy_unique_x_train_wurs_encoded, xy_unique_x_test_wurs_encoded, xy_unique_y_train_wurs_encoded, xy_unique_y_test_wurs_encoded = classification_split(unique_vals_mean_df, 'wurs')

# **Classifiers with Train-Test Split**

## **Logistic Regression**

In [95]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [96]:
# Logistic regression classifer with train-test split

def logistic_regression(x_train, x_test, y_train, y_test):
    
    # Create logistic regression object and fit model
    lr_model = LogisticRegression(random_state=42, max_iter=10000).fit(x_train, y_train)

    # Make predictions
    y_pred = lr_model.predict(x_test)

    # Evaluate model
    accuracy = round(accuracy_score(y_test, y_pred), 3)
    f1 = round(f1_score(y_test, y_pred), 3)
    precision = round(precision_score(y_test, y_pred), 3)
    recall = round(recall_score(y_test, y_pred), 3)

    print(f'Accuracy: {accuracy * 100.0:.2f}%')
    print(f'F1: {f1 * 100.0:.2f}%')
    print(f'Precision: {precision * 100.0:.2f}%')
    print(f'Recall: {recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [97]:
# AQ
logistic_regression(lv_x_train_aq_encoded, lv_x_test_aq_encoded, lv_y_train_aq_encoded, lv_y_test_aq_encoded)

# BDI
logistic_regression(lv_x_train_bdi_encoded, lv_x_test_bdi_encoded, lv_y_train_bdi_encoded, lv_y_test_bdi_encoded)

# WURS
logistic_regression(lv_x_train_wurs_encoded, lv_x_test_wurs_encoded, lv_y_train_wurs_encoded, lv_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Most viewed x, y coordinate pairs

In [98]:
# AQ
logistic_regression(mv_x_train_aq_encoded, mv_x_test_aq_encoded, mv_y_train_aq_encoded, mv_y_test_aq_encoded)

# BDI
logistic_regression(mv_x_train_bdi_encoded, mv_x_test_bdi_encoded, mv_y_train_bdi_encoded, mv_y_test_bdi_encoded)

# WURS
logistic_regression(mv_x_train_wurs_encoded, mv_x_test_wurs_encoded, mv_y_train_wurs_encoded, mv_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 50.00%
Precision: 57.10%
Recall: 44.40%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [99]:
# AQ
# logistic_regression(x_train_reshaped, x_test_reshaped, range_neighborhood_y_train_aq_encoded, range_neighborhood_y_test_aq_encoded)

# BDI
# logistic_regression(range_neighborhood_x_train_bdi_encoded, range_neighborhood_x_test_bdi_encoded, range_neighborhood_y_train_bdi_encoded, range_neighborhood_y_test_bdi_encoded)

# WURS
# logistic_regression(range_neighborhood_x_train_wurs_encoded, range_neighborhood_x_test_wurs_encoded, range_neighborhood_y_train_wurs_encoded, range_neighborhood_y_test_wurs_encoded)

### Euclidean Distance

In [100]:
# AQ
logistic_regression(ed_x_train_aq_encoded, ed_x_test_aq_encoded, ed_y_train_aq_encoded, ed_y_test_aq_encoded)

# BDI
logistic_regression(ed_x_train_bdi_encoded, ed_x_test_bdi_encoded, ed_y_train_bdi_encoded, ed_y_test_bdi_encoded)

# WURS
logistic_regression(ed_x_train_wurs_encoded, ed_x_test_wurs_encoded, ed_y_train_wurs_encoded, ed_y_test_wurs_encoded)

Accuracy: 30.00%
F1: 22.20%
Precision: 22.20%
Recall: 22.20%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Mean

In [101]:
# AQ
logistic_regression(mean_x_train_aq_encoded, mean_x_test_aq_encoded, mean_y_train_aq_encoded, mean_y_test_aq_encoded)

# BDI
logistic_regression(mean_x_train_bdi_encoded, mean_x_test_bdi_encoded, mean_y_train_bdi_encoded, mean_y_test_bdi_encoded)

# WURS
logistic_regression(mean_x_train_wurs_encoded, mean_x_test_wurs_encoded, mean_y_train_wurs_encoded, mean_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Standard deviation

In [102]:
# AQ
logistic_regression(std_x_train_aq_encoded, std_x_test_aq_encoded, std_y_train_aq_encoded, std_y_test_aq_encoded)

# BDI
logistic_regression(std_x_train_bdi_encoded, std_x_test_bdi_encoded, std_y_train_bdi_encoded, std_y_test_bdi_encoded)

# WURS
logistic_regression(std_x_train_wurs_encoded, std_x_test_wurs_encoded, std_y_train_wurs_encoded, std_y_test_wurs_encoded)

Accuracy: 65.00%
F1: 46.20%
Precision: 75.00%
Recall: 33.30%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Median

In [103]:
# AQ
logistic_regression(med_x_train_aq_encoded, med_x_test_aq_encoded, med_y_train_aq_encoded, med_y_test_aq_encoded)

# BDI
logistic_regression(med_x_train_bdi_encoded, med_x_test_bdi_encoded, med_y_train_bdi_encoded, med_y_test_bdi_encoded)

# WURS
logistic_regression(med_x_train_wurs_encoded, med_x_test_wurs_encoded, med_y_train_wurs_encoded, med_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 100.00%
F1: 100.00%
Precision: 100.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Pearsons CC

In [104]:
# AQ
logistic_regression(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_x_test_aq_encoded, xy_pearson_cc_y_train_aq_encoded, xy_pearson_cc_y_test_aq_encoded)

# BDI
logistic_regression(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_x_test_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded, xy_pearson_cc_y_test_bdi_encoded)

# WURS
logistic_regression(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_x_test_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded, xy_pearson_cc_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Spearmans CC

In [105]:
# AQ
logistic_regression(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_x_test_aq_encoded, xy_spearman_cc_y_train_aq_encoded, xy_spearman_cc_y_test_aq_encoded)

# BDI
logistic_regression(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_x_test_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded, xy_spearman_cc_y_test_bdi_encoded)

# WURS
logistic_regression(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_x_test_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded, xy_spearman_cc_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Range

In [106]:
# AQ
logistic_regression(xy_range_x_train_aq_encoded, xy_range_x_test_aq_encoded, xy_range_y_train_aq_encoded, xy_range_y_test_aq_encoded)

# BDI
logistic_regression(xy_range_x_train_bdi_encoded, xy_range_x_test_bdi_encoded, xy_range_y_train_bdi_encoded, xy_range_y_test_bdi_encoded)

# WURS
logistic_regression(xy_range_x_train_wurs_encoded, xy_range_x_test_wurs_encoded, xy_range_y_train_wurs_encoded, xy_range_y_test_wurs_encoded)

Accuracy: 35.00%
F1: 13.30%
Precision: 16.70%
Recall: 11.10%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



### Unique values

In [107]:
# AQ
logistic_regression(xy_unique_x_train_aq_encoded, xy_unique_x_test_aq_encoded, xy_unique_y_train_aq_encoded, xy_unique_y_test_aq_encoded)

# BDI
logistic_regression(xy_unique_x_train_bdi_encoded, xy_unique_x_test_bdi_encoded, xy_unique_y_train_bdi_encoded, xy_unique_y_test_bdi_encoded)

# WURS
logistic_regression(xy_unique_x_train_wurs_encoded, xy_unique_x_test_wurs_encoded, xy_unique_y_train_wurs_encoded, xy_unique_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 80.00%
F1: 88.90%
Precision: 80.00%
Recall: 100.00%



## **Decision Tree**

In [108]:
from sklearn.tree import DecisionTreeClassifier

In [109]:
# Decision Tree classifier with train-test split

def decision_tree_classifier(x_train, x_test, y_train, y_test):
    
    # Create decision tree object and fit classifier
    dt_clf = DecisionTreeClassifier(random_state=42).fit(x_train, y_train)

    # Make predictions
    y_pred = dt_clf.predict(x_test)

    # Evaluate model
    accuracy = round(accuracy_score(y_test, y_pred), 3)
    f1 = round(f1_score(y_test, y_pred), 3)
    precision = round(precision_score(y_test, y_pred), 3)
    recall = round(recall_score(y_test, y_pred), 3)

    print(f'Accuracy: {accuracy * 100.0:.2f}%')
    print(f'F1: {f1 * 100.0:.2f}%')
    print(f'Precision: {precision * 100.0:.2f}%')
    print(f'Recall: {recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [110]:
# AQ
decision_tree_classifier(lv_x_train_aq_encoded, lv_x_test_aq_encoded, lv_y_train_aq_encoded, lv_y_test_aq_encoded)

# BDI
decision_tree_classifier(lv_x_train_bdi_encoded, lv_x_test_bdi_encoded, lv_y_train_bdi_encoded, lv_y_test_bdi_encoded)

# WURS
decision_tree_classifier(lv_x_train_wurs_encoded, lv_x_test_wurs_encoded, lv_y_train_wurs_encoded, lv_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 33.30%
Precision: 33.30%
Recall: 33.30%

Accuracy: 95.00%
F1: 97.40%
Precision: 95.00%
Recall: 100.00%

Accuracy: 65.00%
F1: 77.40%
Precision: 80.00%
Recall: 75.00%



### Most viewed x, y coordinate pairs

In [111]:
# AQ
decision_tree_classifier(mv_x_train_aq_encoded, mv_x_test_aq_encoded, mv_y_train_aq_encoded, mv_y_test_aq_encoded)

# BDI
decision_tree_classifier(mv_x_train_bdi_encoded, mv_x_test_bdi_encoded, mv_y_train_bdi_encoded, mv_y_test_bdi_encoded)

# WURS
decision_tree_classifier(mv_x_train_wurs_encoded, mv_x_test_wurs_encoded, mv_y_train_wurs_encoded, mv_y_test_wurs_encoded)

Accuracy: 50.00%
F1: 50.00%
Precision: 45.50%
Recall: 55.60%

Accuracy: 70.00%
F1: 81.30%
Precision: 100.00%
Recall: 68.40%

Accuracy: 65.00%
F1: 77.40%
Precision: 80.00%
Recall: 75.00%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [112]:
# AQ
# decision_tree_classifier(range_x_train_aq_encoded, range_x_test_aq_encoded, range_y_train_aq_encoded, range_y_test_aq_encoded)

# BDI
# decision_tree_classifier(range_x_train_bdi_encoded, range_x_test_bdi_encoded, range_y_train_bdi_encoded, range_y_test_bdi_encoded)

# WURS
# decision_tree_classifier(range_x_train_wurs_encoded, range_x_test_wurs_encoded, range_y_train_wurs_encoded, range_y_test_wurs_encoded)

### Euclidean Distance

In [113]:
# AQ
decision_tree_classifier(ed_x_train_aq_encoded, ed_x_test_aq_encoded, ed_y_train_aq_encoded, ed_y_test_aq_encoded)

# BDI
decision_tree_classifier(ed_x_train_bdi_encoded, ed_x_test_bdi_encoded, ed_y_train_bdi_encoded, ed_y_test_bdi_encoded)

# WURS
decision_tree_classifier(ed_x_train_wurs_encoded, ed_x_test_wurs_encoded, ed_y_train_wurs_encoded, ed_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 60.00%
Precision: 54.50%
Recall: 66.70%

Accuracy: 80.00%
F1: 88.90%
Precision: 94.10%
Recall: 84.20%

Accuracy: 55.00%
F1: 69.00%
Precision: 76.90%
Recall: 62.50%



### Mean

In [114]:
# AQ
decision_tree_classifier(mean_x_train_aq_encoded, mean_x_test_aq_encoded, mean_y_train_aq_encoded, mean_y_test_aq_encoded)

# BDI
decision_tree_classifier(mean_x_train_bdi_encoded, mean_x_test_bdi_encoded, mean_y_train_bdi_encoded, mean_y_test_bdi_encoded)

# WURS
decision_tree_classifier(mean_x_train_wurs_encoded, mean_x_test_wurs_encoded, mean_y_train_wurs_encoded, mean_y_test_wurs_encoded)

Accuracy: 45.00%
F1: 42.10%
Precision: 40.00%
Recall: 44.40%

Accuracy: 85.00%
F1: 91.40%
Precision: 100.00%
Recall: 84.20%

Accuracy: 85.00%
F1: 90.30%
Precision: 93.30%
Recall: 87.50%



### Standard deviation

In [115]:
# AQ
decision_tree_classifier(std_x_train_aq_encoded, std_x_test_aq_encoded, std_y_train_aq_encoded, std_y_test_aq_encoded)

# BDI
decision_tree_classifier(std_x_train_bdi_encoded, std_x_test_bdi_encoded, std_y_train_bdi_encoded, std_y_test_bdi_encoded)

# WURS
decision_tree_classifier(std_x_train_wurs_encoded, std_x_test_wurs_encoded, std_y_train_wurs_encoded, std_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 50.00%
Precision: 57.10%
Recall: 44.40%

Accuracy: 65.00%
F1: 77.40%
Precision: 100.00%
Recall: 63.20%

Accuracy: 65.00%
F1: 75.90%
Precision: 84.60%
Recall: 68.80%



### Median

In [116]:
# AQ
decision_tree_classifier(med_x_train_aq_encoded, med_x_test_aq_encoded, med_y_train_aq_encoded, med_y_test_aq_encoded)

# BDI
decision_tree_classifier(med_x_train_bdi_encoded, med_x_test_bdi_encoded, med_y_train_bdi_encoded, med_y_test_bdi_encoded)

# WURS
decision_tree_classifier(med_x_train_wurs_encoded, med_x_test_wurs_encoded, med_y_train_wurs_encoded, med_y_test_wurs_encoded)

Accuracy: 45.00%
F1: 52.20%
Precision: 42.90%
Recall: 66.70%

Accuracy: 85.00%
F1: 91.40%
Precision: 100.00%
Recall: 84.20%

Accuracy: 60.00%
F1: 71.40%
Precision: 83.30%
Recall: 62.50%



### Pearsons CC

In [117]:
# AQ
decision_tree_classifier(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_x_test_aq_encoded, xy_pearson_cc_y_train_aq_encoded, xy_pearson_cc_y_test_aq_encoded)

# BDI
decision_tree_classifier(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_x_test_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded, xy_pearson_cc_y_test_bdi_encoded)

# WURS
decision_tree_classifier(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_x_test_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded, xy_pearson_cc_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 25.00%
Precision: 28.60%
Recall: 22.20%

Accuracy: 55.00%
F1: 71.00%
Precision: 91.70%
Recall: 57.90%

Accuracy: 60.00%
F1: 71.40%
Precision: 83.30%
Recall: 62.50%



### Spearmans CC

In [118]:
# AQ
decision_tree_classifier(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_x_test_aq_encoded, xy_spearman_cc_y_train_aq_encoded, xy_spearman_cc_y_test_aq_encoded)

# BDI
decision_tree_classifier(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_x_test_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded, xy_spearman_cc_y_test_bdi_encoded)

# WURS
decision_tree_classifier(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_x_test_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded, xy_spearman_cc_y_test_wurs_encoded)

Accuracy: 65.00%
F1: 63.20%
Precision: 60.00%
Recall: 66.70%

Accuracy: 60.00%
F1: 75.00%
Precision: 92.30%
Recall: 63.20%

Accuracy: 40.00%
F1: 53.80%
Precision: 70.00%
Recall: 43.80%



### Range

In [119]:
# AQ
decision_tree_classifier(xy_range_x_train_aq_encoded, xy_range_x_test_aq_encoded, xy_range_y_train_aq_encoded, xy_range_y_test_aq_encoded)

# BDI
decision_tree_classifier(xy_range_x_train_bdi_encoded, xy_range_x_test_bdi_encoded, xy_range_y_train_bdi_encoded, xy_range_y_test_bdi_encoded)

# WURS
decision_tree_classifier(xy_range_x_train_wurs_encoded, xy_range_x_test_wurs_encoded, xy_range_y_train_wurs_encoded, xy_range_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 25.00%
Precision: 28.60%
Recall: 22.20%

Accuracy: 70.00%
F1: 82.40%
Precision: 93.30%
Recall: 73.70%

Accuracy: 65.00%
F1: 77.40%
Precision: 80.00%
Recall: 75.00%



### Unique values

In [120]:
# AQ
decision_tree_classifier(xy_unique_x_train_aq_encoded, xy_unique_x_test_aq_encoded, xy_unique_y_train_aq_encoded, xy_unique_y_test_aq_encoded)

# BDI
decision_tree_classifier(xy_unique_x_train_bdi_encoded, xy_unique_x_test_bdi_encoded, xy_unique_y_train_bdi_encoded, xy_unique_y_test_bdi_encoded)

# WURS
decision_tree_classifier(xy_unique_x_train_wurs_encoded, xy_unique_x_test_wurs_encoded, xy_unique_y_train_wurs_encoded, xy_unique_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 33.30%
Precision: 33.30%
Recall: 33.30%

Accuracy: 95.00%
F1: 97.30%
Precision: 100.00%
Recall: 94.70%

Accuracy: 60.00%
F1: 73.30%
Precision: 78.60%
Recall: 68.80%



## **Random Forest**

In [121]:
from sklearn.ensemble import RandomForestClassifier

In [122]:
# Random forest classifier with train-test split

def random_forest_classifier(x_train, x_test, y_train, y_test):
    
    # Create random forest object and fit classifier
    rf_clf = RandomForestClassifier(n_estimators=100, random_state=42).fit(x_train, y_train)

    # Make predictions
    y_pred = rf_clf.predict(x_test)

    # Evaluate model
    accuracy = round(accuracy_score(y_test, y_pred), 3)
    f1 = round(f1_score(y_test, y_pred, average='macro'), 3)
    precision = round(precision_score(y_test, y_pred, average='macro'), 3)
    recall = round(recall_score(y_test, y_pred, average='macro'), 3)

    print(f'Accuracy: {accuracy * 100.0:.2f}%')
    print(f'F1: {f1 * 100.0:.2f}%')
    print(f'Precision: {precision * 100.0:.2f}%')
    print(f'Recall: {recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [123]:
# AQ
random_forest_classifier(lv_x_train_aq_encoded, lv_x_test_aq_encoded, lv_y_train_aq_encoded, lv_y_test_aq_encoded)

# BDI
random_forest_classifier(lv_x_train_bdi_encoded, lv_x_test_bdi_encoded, lv_y_train_bdi_encoded, lv_y_test_bdi_encoded)

# WURS
random_forest_classifier(lv_x_train_wurs_encoded, lv_x_test_wurs_encoded, lv_y_train_wurs_encoded, lv_y_test_wurs_encoded)

Accuracy: 35.00%
F1: 33.50%
Precision: 33.30%
Recall: 33.80%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 65.00%
F1: 56.10%
Precision: 56.60%
Recall: 59.40%



### Most viewed x, y coordinate pairs

In [124]:
# AQ
random_forest_classifier(mv_x_train_aq_encoded, mv_x_test_aq_encoded, mv_y_train_aq_encoded, mv_y_test_aq_encoded)

# BDI
random_forest_classifier(mv_x_train_bdi_encoded, mv_x_test_bdi_encoded, mv_y_train_bdi_encoded, mv_y_test_bdi_encoded)

# WURS
random_forest_classifier(mv_x_train_wurs_encoded, mv_x_test_wurs_encoded, mv_y_train_wurs_encoded, mv_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 37.50%
Precision: 37.40%
Recall: 38.40%

Accuracy: 90.00%
F1: 47.40%
Precision: 47.40%
Recall: 47.40%

Accuracy: 50.00%
F1: 40.50%
Precision: 43.80%
Recall: 40.60%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [125]:
# AQ
# random_forest_classifier(range_x_train_aq_encoded, range_x_test_aq_encoded, range_y_train_aq_encoded, range_y_test_aq_encoded)

# BDI
# random_forest_classifier(range_x_train_bdi_encoded, range_x_test_bdi_encoded, range_y_train_bdi_encoded, range_y_test_bdi_encoded)

# WURS
# random_forest_classifier(range_x_train_wurs_encoded, range_x_test_wurs_encoded, range_y_train_wurs_encoded, range_y_test_wurs_encoded)

### Euclidean Distance

In [126]:
# AQ
random_forest_classifier(ed_x_train_aq_encoded, ed_x_test_aq_encoded, ed_y_train_aq_encoded, ed_y_test_aq_encoded)

# BDI
random_forest_classifier(ed_x_train_bdi_encoded, ed_x_test_bdi_encoded, ed_y_train_bdi_encoded, ed_y_test_bdi_encoded)

# WURS
random_forest_classifier(ed_x_train_wurs_encoded, ed_x_test_wurs_encoded, ed_y_train_wurs_encoded, ed_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 60.00%
Precision: 60.60%
Recall: 60.60%

Accuracy: 80.00%
F1: 44.40%
Precision: 47.10%
Recall: 42.10%

Accuracy: 55.00%
F1: 43.60%
Precision: 45.60%
Recall: 43.80%



### Mean

In [127]:
# AQ
random_forest_classifier(mean_x_train_aq_encoded, mean_x_test_aq_encoded, mean_y_train_aq_encoded, mean_y_test_aq_encoded)

# BDI
random_forest_classifier(mean_x_train_bdi_encoded, mean_x_test_bdi_encoded, mean_y_train_bdi_encoded, mean_y_test_bdi_encoded)

# WURS
random_forest_classifier(mean_x_train_wurs_encoded, mean_x_test_wurs_encoded, mean_y_train_wurs_encoded, mean_y_test_wurs_encoded)

Accuracy: 35.00%
F1: 34.80%
Precision: 35.00%
Recall: 34.80%

Accuracy: 90.00%
F1: 47.40%
Precision: 47.40%
Recall: 47.40%

Accuracy: 75.00%
F1: 56.70%
Precision: 57.80%
Recall: 56.20%



### Standard deviation

In [128]:
# AQ
random_forest_classifier(std_x_train_aq_encoded, std_x_test_aq_encoded, std_y_train_aq_encoded, std_y_test_aq_encoded)

# BDI
random_forest_classifier(std_x_train_bdi_encoded, std_x_test_bdi_encoded, std_y_train_bdi_encoded, std_y_test_bdi_encoded)

# WURS
random_forest_classifier(std_x_train_wurs_encoded, std_x_test_wurs_encoded, std_y_train_wurs_encoded, std_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 59.60%
Precision: 59.60%
Recall: 59.60%

Accuracy: 65.00%
F1: 49.80%
Precision: 56.20%
Recall: 81.60%

Accuracy: 60.00%
F1: 37.50%
Precision: 37.50%
Recall: 37.50%



### Median

In [129]:
# AQ
random_forest_classifier(med_x_train_aq_encoded, med_x_test_aq_encoded, med_y_train_aq_encoded, med_y_test_aq_encoded)

# BDI
random_forest_classifier(med_x_train_bdi_encoded, med_x_test_bdi_encoded, med_y_train_bdi_encoded, med_y_test_bdi_encoded)

# WURS
random_forest_classifier(med_x_train_wurs_encoded, med_x_test_wurs_encoded, med_y_train_wurs_encoded, med_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 54.90%
Precision: 56.20%
Recall: 56.10%

Accuracy: 85.00%
F1: 45.90%
Precision: 47.20%
Recall: 44.70%

Accuracy: 55.00%
F1: 52.00%
Precision: 58.10%
Recall: 62.50%



### Pearsons CC

In [130]:
# AQ
random_forest_classifier(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_x_test_aq_encoded, xy_pearson_cc_y_train_aq_encoded, xy_pearson_cc_y_test_aq_encoded)

# BDI
random_forest_classifier(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_x_test_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded, xy_pearson_cc_y_test_bdi_encoded)

# WURS
random_forest_classifier(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_x_test_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded, xy_pearson_cc_y_test_wurs_encoded)

Accuracy: 40.00%
F1: 37.50%
Precision: 37.40%
Recall: 38.40%

Accuracy: 55.00%
F1: 35.50%
Precision: 45.80%
Recall: 28.90%

Accuracy: 60.00%
F1: 52.40%
Precision: 54.20%
Recall: 56.20%



### Spearmans CC

In [131]:
# AQ
random_forest_classifier(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_x_test_aq_encoded, xy_spearman_cc_y_train_aq_encoded, xy_spearman_cc_y_test_aq_encoded)

# BDI
random_forest_classifier(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_x_test_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded, xy_spearman_cc_y_test_bdi_encoded)

# WURS
random_forest_classifier(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_x_test_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded, xy_spearman_cc_y_test_wurs_encoded)

Accuracy: 65.00%
F1: 64.90%
Precision: 65.00%
Recall: 65.20%

Accuracy: 60.00%
F1: 37.50%
Precision: 46.20%
Recall: 31.60%

Accuracy: 40.00%
F1: 34.10%
Precision: 40.00%
Recall: 34.40%



### Range

In [132]:
# AQ
random_forest_classifier(xy_range_x_train_aq_encoded, xy_range_x_test_aq_encoded, xy_range_y_train_aq_encoded, xy_range_y_test_aq_encoded)

# BDI
random_forest_classifier(xy_range_x_train_bdi_encoded, xy_range_x_test_bdi_encoded, xy_range_y_train_bdi_encoded, xy_range_y_test_bdi_encoded)

# WURS
random_forest_classifier(xy_range_x_train_wurs_encoded, xy_range_x_test_wurs_encoded, xy_range_y_train_wurs_encoded, xy_range_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 54.90%
Precision: 55.00%
Recall: 55.10%

Accuracy: 90.00%
F1: 47.40%
Precision: 47.40%
Recall: 47.40%

Accuracy: 60.00%
F1: 46.70%
Precision: 47.60%
Recall: 46.90%



### Unique values

In [133]:
# AQ
random_forest_classifier(xy_unique_x_train_aq_encoded, xy_unique_x_test_aq_encoded, xy_unique_y_train_aq_encoded, xy_unique_y_test_aq_encoded)

# BDI
random_forest_classifier(xy_unique_x_train_bdi_encoded, xy_unique_x_test_bdi_encoded, xy_unique_y_train_bdi_encoded, xy_unique_y_test_bdi_encoded)

# WURS
random_forest_classifier(xy_unique_x_train_wurs_encoded, xy_unique_x_test_wurs_encoded, xy_unique_y_train_wurs_encoded, xy_unique_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 52.00%
Precision: 53.60%
Recall: 53.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 53.10%
Precision: 53.10%
Recall: 53.10%



## **SVM**

In [134]:
from sklearn import svm

In [135]:
# SVM classifier with train-test split

def svm_classifier(x_train, x_test, y_train, y_test):
    
    # Create support vector machine object and fit classifier
    svm_clf = svm.SVC(random_state=42).fit(x_train, y_train)

    # Make predictions
    y_pred = svm_clf.predict(x_test)

    # Evaluate model
    accuracy = round(accuracy_score(y_test, y_pred), 3)
    f1 = round(f1_score(y_test, y_pred, average='macro'), 3)
    precision = round(precision_score(y_test, y_pred, average='macro'), 3)
    recall = round(recall_score(y_test, y_pred, average='macro'), 3)

    print(f'Accuracy: {accuracy * 100.0:.2f}%')
    print(f'F1: {f1 * 100.0:.2f}%')
    print(f'Precision: {precision * 100.0:.2f}%')
    print(f'Recall: {recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [136]:
# AQ
svm_classifier(lv_x_train_aq_encoded, lv_x_test_aq_encoded, lv_y_train_aq_encoded, lv_y_test_aq_encoded)

# BDI
svm_classifier(lv_x_train_bdi_encoded, lv_x_test_bdi_encoded, lv_y_train_bdi_encoded, lv_y_test_bdi_encoded)

# WURS
svm_classifier(lv_x_train_wurs_encoded, lv_x_test_wurs_encoded, lv_y_train_wurs_encoded, lv_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Most viewed x, y coordinate pairs

In [137]:
# AQ
svm_classifier(mv_x_train_aq_encoded, mv_x_test_aq_encoded, mv_y_train_aq_encoded, mv_y_test_aq_encoded)

# BDI
svm_classifier(mv_x_train_bdi_encoded, mv_x_test_bdi_encoded, mv_y_train_bdi_encoded, mv_y_test_bdi_encoded)

# WURS
svm_classifier(mv_x_train_wurs_encoded, mv_x_test_wurs_encoded, mv_y_train_wurs_encoded, mv_y_test_wurs_encoded)

Accuracy: 60.00%
F1: 52.40%
Precision: 62.70%
Recall: 56.60%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [138]:
# AQ
# svm_classifier(range_x_train_aq_encoded, range_x_test_aq_encoded, range_y_train_aq_encoded, range_y_test_aq_encoded)

# BDI
# svm_classifier(range_x_train_bdi_encoded, range_x_test_bdi_encoded, range_y_train_bdi_encoded, range_y_test_bdi_encoded)

# WURS
# svm_classifier(range_x_train_wurs_encoded, range_x_test_wurs_encoded, range_y_train_wurs_encoded, range_y_test_wurs_encoded)

### Euclidean Distance

In [139]:
# AQ
svm_classifier(ed_x_train_aq_encoded, ed_x_test_aq_encoded, ed_y_train_aq_encoded, ed_y_test_aq_encoded)

# BDI
svm_classifier(ed_x_train_bdi_encoded, ed_x_test_bdi_encoded, ed_y_train_bdi_encoded, ed_y_test_bdi_encoded)

# WURS
svm_classifier(ed_x_train_wurs_encoded, ed_x_test_wurs_encoded, ed_y_train_wurs_encoded, ed_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Mean

In [140]:
# AQ
svm_classifier(mean_x_train_aq_encoded, mean_x_test_aq_encoded, mean_y_train_aq_encoded, mean_y_test_aq_encoded)

# BDI
svm_classifier(mean_x_train_bdi_encoded, mean_x_test_bdi_encoded, mean_y_train_bdi_encoded, mean_y_test_bdi_encoded)

# WURS
svm_classifier(mean_x_train_wurs_encoded, mean_x_test_wurs_encoded, mean_y_train_wurs_encoded, mean_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Standard deviation

In [141]:
# AQ
svm_classifier(std_x_train_aq_encoded, std_x_test_aq_encoded, std_y_train_aq_encoded, std_y_test_aq_encoded)

# BDI
svm_classifier(std_x_train_bdi_encoded, std_x_test_bdi_encoded, std_y_train_bdi_encoded, std_y_test_bdi_encoded)

# WURS
svm_classifier(std_x_train_wurs_encoded, std_x_test_wurs_encoded, std_y_train_wurs_encoded, std_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Median

In [142]:
# AQ
svm_classifier(med_x_train_aq_encoded, med_x_test_aq_encoded, med_y_train_aq_encoded, med_y_test_aq_encoded)

# BDI
svm_classifier(med_x_train_bdi_encoded, med_x_test_bdi_encoded, med_y_train_bdi_encoded, med_y_test_bdi_encoded)

# WURS
svm_classifier(med_x_train_wurs_encoded, med_x_test_wurs_encoded, med_y_train_wurs_encoded, med_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Pearsons CC

In [143]:
# AQ
svm_classifier(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_x_test_aq_encoded, xy_pearson_cc_y_train_aq_encoded, xy_pearson_cc_y_test_aq_encoded)

# BDI
svm_classifier(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_x_test_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded, xy_pearson_cc_y_test_bdi_encoded)

# WURS
svm_classifier(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_x_test_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded, xy_pearson_cc_y_test_wurs_encoded)

Accuracy: 35.00%
F1: 33.50%
Precision: 33.30%
Recall: 33.80%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 75.00%
F1: 42.90%
Precision: 39.50%
Recall: 46.90%



### Spearmans CC

In [144]:
# AQ
svm_classifier(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_x_test_aq_encoded, xy_spearman_cc_y_train_aq_encoded, xy_spearman_cc_y_test_aq_encoded)

# BDI
svm_classifier(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_x_test_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded, xy_spearman_cc_y_test_bdi_encoded)

# WURS
svm_classifier(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_x_test_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded, xy_spearman_cc_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 52.00%
Precision: 53.60%
Recall: 53.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Range

In [145]:
# AQ
svm_classifier(xy_range_x_train_aq_encoded, xy_range_x_test_aq_encoded, xy_range_y_train_aq_encoded, xy_range_y_test_aq_encoded)

# BDI
svm_classifier(xy_range_x_train_bdi_encoded, xy_range_x_test_bdi_encoded, xy_range_y_train_bdi_encoded, xy_range_y_test_bdi_encoded)

# WURS
svm_classifier(xy_range_x_train_wurs_encoded, xy_range_x_test_wurs_encoded, xy_range_y_train_wurs_encoded, xy_range_y_test_wurs_encoded)

Accuracy: 25.00%
F1: 20.00%
Precision: 17.90%
Recall: 22.70%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



### Unique values

In [146]:
# AQ
svm_classifier(xy_unique_x_train_aq_encoded, xy_unique_x_test_aq_encoded, xy_unique_y_train_aq_encoded, xy_unique_y_test_aq_encoded)

# BDI
svm_classifier(xy_unique_x_train_bdi_encoded, xy_unique_x_test_bdi_encoded, xy_unique_y_train_bdi_encoded, xy_unique_y_test_bdi_encoded)

# WURS
svm_classifier(xy_unique_x_train_wurs_encoded, xy_unique_x_test_wurs_encoded, xy_unique_y_train_wurs_encoded, xy_unique_y_test_wurs_encoded)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 95.00%
F1: 48.70%
Precision: 47.50%
Recall: 50.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 40.00%
Recall: 50.00%



# **Classifiers with Cross Validation**

## **Logistic Regression**

In [147]:
from sklearn.model_selection import cross_val_score, cross_val_predict

In [148]:
# Logistic regression classifier with cross validation

def logistic_regression_cv(x_train, y_train, k=10):
    # Create logistic regression object and fit classifier
    lr_model_cv = LogisticRegression(random_state=42, max_iter=10000).fit(x_train, y_train)
    
    # Perform k-fold cross-validation and calculate accuracy, precision, recall, and F1 scores for each fold
    scores = cross_val_score(lr_model_cv, x_train, y_train, cv=k, scoring='accuracy')
    precision_scores = cross_val_score(lr_model_cv, x_train, y_train, cv=k, scoring='precision')
    recall_scores = cross_val_score(lr_model_cv, x_train, y_train, cv=k, scoring='recall')
    f1_scores = cross_val_score(lr_model_cv, x_train, y_train, cv=k, scoring='f1')

    # Perform k-fold cross-validation and find predicted values
    y_pred = cross_val_predict(lr_model_cv, x_train, y_train, cv=k)
    
    # # Print the predicted values for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(y_pred)
    #     print()
    
    # # Print the evaluation metrics for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(f'Accuracy: {scores[i]:.2f}')
    #     print(f'Precision: {precision_scores[i]:.2f}')
    #     print(f'Recall: {recall_scores[i]:.2f}')
    #     print(f'F1: {f1_scores[i]:.2f}')
    #     print()
    
    # Calculate the mean and standard deviation of the evaluation metrics
    mean_accuracy = scores.mean()
    mean_precision = precision_scores.mean()
    mean_recall = recall_scores.mean()
    mean_f1 = f1_scores.mean()

    print(f'Accuracy: {mean_accuracy * 100.0:.2f}%')
    print(f'F1: {mean_f1 * 100.0:.2f}%')
    print(f'precision: {mean_precision * 100.0:.2f}%')
    print(f'Recall: {mean_recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [149]:
# AQ
logistic_regression_cv(lv_x_train_aq_encoded, lv_y_train_aq_encoded)

# BDI
logistic_regression_cv(lv_x_train_bdi_encoded, lv_y_train_bdi_encoded)

# WURS
logistic_regression_cv(lv_x_train_wurs_encoded, lv_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 3.33%
precision: 3.33%
Recall: 3.33%

Accuracy: 78.75%
F1: 88.00%
precision: 79.82%
Recall: 98.57%

Accuracy: 63.75%
F1: 77.66%
precision: 65.42%
Recall: 96.67%



### Most viewed x, y coordinate pairs

In [150]:
# AQ
logistic_regression_cv(mv_x_train_aq_encoded, mv_y_train_aq_encoded)

# BDI
logistic_regression_cv(mv_x_train_bdi_encoded, mv_y_train_bdi_encoded)

# WURS
logistic_regression_cv(mv_x_train_wurs_encoded, mv_y_train_wurs_encoded)

Accuracy: 56.25%
F1: 48.99%
precision: 57.50%
Recall: 46.67%

Accuracy: 80.00%
F1: 88.76%
precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [151]:
# AQ
# logistic_regression_cv(range_x_train_aq_ecoded, range_y_train_aq_encoded)

# BDI
# logistic_regression_cv(range_x_train_bdi_ecoded, range_y_train_bdi_encoded)

# WURS
# logistic_regression_cv(range_x_train_wurs_ecoded, range_y_train_wurs_encoded)

### Euclidean Distance

In [152]:
# AQ
logistic_regression_cv(ed_x_train_aq_encoded, ed_y_train_aq_encoded)

# BDI
logistic_regression_cv(ed_x_train_bdi_encoded, ed_y_train_bdi_encoded)

# WURS
logistic_regression_cv(ed_x_train_wurs_encoded, ed_y_train_wurs_encoded)

Accuracy: 55.00%
F1: 29.71%
precision: 45.83%
Recall: 23.33%

Accuracy: 80.00%
F1: 88.76%
precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Mean

In [153]:
# AQ
logistic_regression_cv(mean_x_train_aq_encoded, mean_y_train_aq_encoded)

# BDI
logistic_regression_cv(mean_x_train_bdi_encoded, mean_y_train_bdi_encoded)

# WURS
logistic_regression_cv(mean_x_train_wurs_encoded, mean_y_train_wurs_encoded)

Accuracy: 55.00%
F1: 19.00%
precision: 30.00%
Recall: 15.00%

Accuracy: 76.25%
F1: 85.98%
precision: 78.81%
Recall: 95.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Standard deviation

In [154]:
# AQ
logistic_regression_cv(std_x_train_aq_encoded, std_y_train_aq_encoded)

# BDI
logistic_regression_cv(std_x_train_bdi_encoded, std_y_train_bdi_encoded)

# WURS
logistic_regression_cv(std_x_train_wurs_encoded, std_y_train_wurs_encoded)

Accuracy: 45.00%
F1: 25.43%
precision: 32.50%
Recall: 21.67%

Accuracy: 78.75%
F1: 88.00%
precision: 79.82%
Recall: 98.57%

Accuracy: 65.00%
F1: 78.53%
precision: 65.71%
Recall: 98.00%



### Median

In [155]:
# AQ
logistic_regression_cv(med_x_train_aq_encoded, med_y_train_aq_encoded)

# BDI
logistic_regression_cv(med_x_train_bdi_encoded, med_y_train_bdi_encoded)

# WURS
logistic_regression_cv(med_x_train_wurs_encoded, med_y_train_wurs_encoded)

Accuracy: 55.00%
F1: 27.10%
precision: 36.67%
Recall: 23.33%

Accuracy: 77.50%
F1: 86.86%
precision: 79.17%
Recall: 96.67%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Pearsons CC

In [156]:
# AQ
logistic_regression_cv(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_y_train_aq_encoded)

# BDI
logistic_regression_cv(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded)

# WURS
logistic_regression_cv(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded)

Accuracy: 52.50%
F1: 0.00%
precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Spearmans CC

In [157]:
# AQ
logistic_regression_cv(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_y_train_aq_encoded)

# BDI
logistic_regression_cv(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded)

# WURS
logistic_regression_cv(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 0.00%
precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Range

In [158]:
# AQ
logistic_regression_cv(xy_range_x_train_aq_encoded, xy_range_y_train_aq_encoded)

# BDI
logistic_regression_cv(xy_range_x_train_bdi_encoded, xy_range_y_train_bdi_encoded)

# WURS
logistic_regression_cv(xy_range_x_train_wurs_encoded, xy_range_y_train_wurs_encoded)

Accuracy: 47.50%
F1: 22.69%
precision: 35.33%
Recall: 18.33%

Accuracy: 77.50%
F1: 87.12%
precision: 79.46%
Recall: 96.90%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



### Unique values

In [159]:
# AQ
logistic_regression_cv(xy_unique_x_train_aq_encoded, xy_unique_y_train_aq_encoded)

# BDI
logistic_regression_cv(xy_unique_x_train_bdi_encoded, xy_unique_y_train_bdi_encoded)

# WURS
logistic_regression_cv(xy_unique_x_train_wurs_encoded, xy_unique_y_train_wurs_encoded)

Accuracy: 57.50%
F1: 36.94%
precision: 41.50%
Recall: 35.00%

Accuracy: 80.00%
F1: 88.76%
precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
precision: 66.25%
Recall: 100.00%



## **Decision Tree**

In [160]:
# Decision Tree classifier with cross validation

def decision_tree_cv(x_train, y_train, k=10):
    
    # Create decision tree object and fit classifier
    dt_clf_cv = DecisionTreeClassifier(random_state=42).fit(x_train, y_train)
    
    # Perform k-fold cross-validation and  calculate accuracy, precision, recall, and F1 scores
    scores = cross_val_score(dt_clf_cv, x_train, y_train, cv=k, scoring='accuracy')
    precision_scores = cross_val_score(dt_clf_cv, x_train, y_train, cv=k, scoring='precision')
    recall_scores = cross_val_score(dt_clf_cv, x_train, y_train, cv=k, scoring='recall')
    f1_scores = cross_val_score(dt_clf_cv, x_train, y_train, cv=k, scoring='f1')

    # Perform k-fold cross-validation and find predicted values
    # y_pred = cross_val_predict(dt_clf_cv, x_train, y_train, cv=k)
    
    # # Print the predicted values for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(y_pred)
    #     print()
    
    # Print the evaluation metrics for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(f'Accuracy: {scores[i]:.2f}')
    #     print(f'Precision: {precision_scores[i]:.2f}')
    #     print(f'Recall: {recall_scores[i]:.2f}')
    #     print(f'F1: {f1_scores[i]:.2f}')
    #     print()
    
    # Calculate the mean and standard deviation of the evaluation metrics
    mean_accuracy = scores.mean()
    mean_precision = precision_scores.mean()
    mean_recall = recall_scores.mean()
    mean_f1 = f1_scores.mean()

    print(f'Accuracy: {mean_accuracy * 100.0:.2f}%')
    print(f'F1: {mean_f1 * 100.0:.2f}%')
    print(f'Precision: {mean_precision * 100.0:.2f}%')
    print(f'Recall: {mean_recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [161]:
# AQ
decision_tree_cv(lv_x_train_aq_encoded, lv_y_train_aq_encoded)

# BDI
decision_tree_cv(lv_x_train_bdi_encoded, lv_y_train_bdi_encoded)

# WURS
decision_tree_cv(lv_x_train_wurs_encoded, lv_y_train_wurs_encoded)

Accuracy: 46.25%
F1: 41.07%
Precision: 37.67%
Recall: 45.83%

Accuracy: 76.25%
F1: 85.20%
Precision: 84.05%
Recall: 87.86%

Accuracy: 55.00%
F1: 65.27%
Precision: 66.67%
Recall: 64.33%



### Most viewed x, y coordinate pairs

In [162]:
# AQ
decision_tree_cv(mv_x_train_aq_encoded, mv_y_train_aq_encoded)

# BDI
decision_tree_cv(mv_x_train_bdi_encoded, mv_y_train_bdi_encoded)

# WURS
decision_tree_cv(mv_x_train_wurs_encoded, mv_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 39.93%
Precision: 40.17%
Recall: 41.67%

Accuracy: 61.25%
F1: 74.07%
Precision: 76.23%
Recall: 75.48%

Accuracy: 52.50%
F1: 64.24%
Precision: 65.38%
Recall: 64.67%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [163]:
# AQ
# decision_tree_cv(range_x_train_aq_encoded, range_y_train_aq_encoded)

# BDI
# decision_tree_cv(range_x_train_bdi_encoded, range_y_train_bdi_encoded)

# WURS
# decision_tree_cv(range_x_train_wurs_encoded, range_y_train_wurs_encoded)

### Euclidean Distance

In [164]:
# AQ
decision_tree_cv(ed_x_train_aq_encoded, ed_y_train_aq_encoded)

# BDI
decision_tree_cv(ed_x_train_bdi_encoded, ed_y_train_bdi_encoded)

# WURS
decision_tree_cv(ed_x_train_wurs_encoded, ed_y_train_wurs_encoded)

Accuracy: 52.50%
F1: 48.88%
Precision: 48.83%
Recall: 50.83%

Accuracy: 75.00%
F1: 84.12%
Precision: 83.89%
Recall: 85.95%

Accuracy: 58.75%
F1: 67.64%
Precision: 68.83%
Recall: 68.00%



### Mean

In [165]:
# AQ
decision_tree_cv(mean_x_train_aq_encoded, mean_y_train_aq_encoded)

# BDI
decision_tree_cv(mean_x_train_bdi_encoded, mean_y_train_bdi_encoded)

# WURS
decision_tree_cv(mean_x_train_wurs_encoded, mean_y_train_wurs_encoded)

Accuracy: 56.25%
F1: 47.40%
Precision: 48.00%
Recall: 50.83%

Accuracy: 70.00%
F1: 80.79%
Precision: 80.71%
Recall: 82.86%

Accuracy: 53.75%
F1: 63.44%
Precision: 66.21%
Recall: 62.00%



### Standard deviation

In [166]:
# AQ
decision_tree_cv(std_x_train_aq_encoded, std_y_train_aq_encoded)

# BDI
decision_tree_cv(std_x_train_bdi_encoded, std_y_train_bdi_encoded)

# WURS
decision_tree_cv(std_x_train_wurs_encoded, std_y_train_wurs_encoded)

Accuracy: 56.25%
F1: 52.60%
Precision: 55.67%
Recall: 54.17%

Accuracy: 68.75%
F1: 80.17%
Precision: 80.60%
Recall: 81.67%

Accuracy: 48.75%
F1: 57.13%
Precision: 61.98%
Recall: 54.33%



### Median

In [167]:
# AQ
decision_tree_cv(med_x_train_aq_encoded, med_y_train_aq_encoded)

# BDI
decision_tree_cv(med_x_train_bdi_encoded, med_y_train_bdi_encoded)

# WURS
decision_tree_cv(med_x_train_wurs_encoded, med_y_train_wurs_encoded)

Accuracy: 65.00%
F1: 56.71%
Precision: 58.67%
Recall: 62.50%

Accuracy: 70.00%
F1: 79.50%
Precision: 83.38%
Recall: 78.10%

Accuracy: 53.75%
F1: 64.50%
Precision: 64.30%
Recall: 67.67%



### Pearsons CC

In [168]:
# AQ
decision_tree_cv(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_y_train_aq_encoded)

# BDI
decision_tree_cv(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded)

# WURS
decision_tree_cv(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded)

Accuracy: 52.50%
F1: 48.81%
Precision: 49.17%
Recall: 52.50%

Accuracy: 60.00%
F1: 73.00%
Precision: 74.70%
Recall: 73.33%

Accuracy: 55.00%
F1: 61.44%
Precision: 71.48%
Recall: 63.67%



### Spearmans CC

In [169]:
# AQ
decision_tree_cv(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_y_train_aq_encoded)

# BDI
decision_tree_cv(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded)

# WURS
decision_tree_cv(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded)

Accuracy: 57.50%
F1: 55.63%
Precision: 57.50%
Recall: 58.33%

Accuracy: 60.00%
F1: 73.65%
Precision: 73.73%
Recall: 74.29%

Accuracy: 52.50%
F1: 63.78%
Precision: 64.38%
Recall: 64.00%



### Range

In [170]:
# AQ
decision_tree_cv(xy_range_x_train_aq_encoded, xy_range_y_train_aq_encoded)

# BDI
decision_tree_cv(xy_range_x_train_bdi_encoded, xy_range_y_train_bdi_encoded)

# WURS
decision_tree_cv(xy_range_x_train_wurs_encoded, xy_range_y_train_wurs_encoded)

Accuracy: 55.00%
F1: 45.05%
Precision: 51.67%
Recall: 45.83%

Accuracy: 60.00%
F1: 72.89%
Precision: 76.08%
Recall: 71.90%

Accuracy: 56.25%
F1: 64.98%
Precision: 72.30%
Recall: 63.00%



### Unique values

In [171]:
# AQ
decision_tree_cv(xy_unique_x_train_aq_encoded, xy_unique_y_train_aq_encoded)

# BDI
decision_tree_cv(xy_unique_x_train_bdi_encoded, xy_unique_y_train_bdi_encoded)

# WURS
decision_tree_cv(xy_unique_x_train_wurs_encoded, xy_unique_y_train_wurs_encoded)

Accuracy: 43.75%
F1: 41.17%
Precision: 41.69%
Recall: 43.33%

Accuracy: 66.25%
F1: 78.61%
Precision: 80.02%
Recall: 80.48%

Accuracy: 43.75%
F1: 55.91%
Precision: 57.43%
Recall: 57.00%



## **Random Forest**

In [172]:
# Random Forest classifier with cross validation

def random_forest_cv(x_train, y_train, k=10):
    
    # Create random forest object and fit data
    rf_clf_cv = RandomForestClassifier(n_estimators=100, random_state=42).fit(x_train, y_train)
    
    # Perform k-fold cross-validation and  calculate accuracy, precision, recall, and F1 scores
    scores = cross_val_score(rf_clf_cv, x_train, y_train, cv=k, scoring='accuracy')
    precision_scores = cross_val_score(rf_clf_cv, x_train, y_train, cv=k, scoring='precision')
    recall_scores = cross_val_score(rf_clf_cv, x_train, y_train, cv=k, scoring='recall')
    f1_scores = cross_val_score(rf_clf_cv, x_train, y_train, cv=k, scoring='f1')

    # Perform k-fold cross-validation and find predicted values
    # y_pred = cross_val_predict(rf_clf_cv, x_train, y_train, cv=k)
    
    # # Print the predicted values for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(y_pred)
    #     print()
    
    # Print the evaluation metrics for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(f'Accuracy: {scores[i]:.2f}')
    #     print(f'Precision: {precision_scores[i]:.2f}')
    #     print(f'Recall: {recall_scores[i]:.2f}')
    #     print(f'F1: {f1_scores[i]:.2f}')
    #     print()
    
    # Calculate the mean and standard deviation of the evaluation metrics
    mean_accuracy = scores.mean()
    mean_precision = precision_scores.mean()
    mean_recall = recall_scores.mean()
    mean_f1 = f1_scores.mean()
    
    print(f'Accuracy: {mean_accuracy * 100.0:.2f}%')
    print(f'F1: {mean_f1 * 100.0:.2f}%')
    print(f'Precision: {mean_precision * 100.0:.2f}%')
    print(f'Recall: {mean_recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [173]:
# AQ
random_forest_cv(lv_x_train_aq_encoded, lv_y_train_aq_encoded)

# BDI
random_forest_cv(lv_x_train_bdi_encoded, lv_y_train_bdi_encoded)

# WURS
random_forest_cv(lv_x_train_wurs_encoded, lv_y_train_wurs_encoded)

Accuracy: 46.25%
F1: 37.27%
Precision: 37.33%
Recall: 40.83%

Accuracy: 78.75%
F1: 86.86%
Precision: 84.23%
Recall: 90.71%

Accuracy: 53.75%
F1: 65.35%
Precision: 63.48%
Recall: 68.00%



### Most viewed x, y coordinate pairs

In [174]:
# AQ
random_forest_cv(mv_x_train_aq_encoded, mv_y_train_aq_encoded)

# BDI
random_forest_cv(mv_x_train_bdi_encoded, mv_y_train_bdi_encoded)

# WURS
random_forest_cv(mv_x_train_wurs_encoded, mv_y_train_wurs_encoded)

Accuracy: 57.50%
F1: 49.81%
Precision: 55.17%
Recall: 50.00%

Accuracy: 68.75%
F1: 80.94%
Precision: 77.26%
Recall: 85.95%

Accuracy: 57.50%
F1: 69.96%
Precision: 67.76%
Recall: 75.67%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [175]:
# AQ
# random_forest_cv(range_x_train_aq_encoded, range_y_train_aq_encoded)

# BDI
# random_forest_cv(range_x_train_bdi_encoded, range_y_train_bdi_encoded)

# WURS
# random_forest_cv(range_x_train_wurs_encoded, range_y_train_wurs_encoded)

### Euclidean Distance

In [176]:
# AQ
random_forest_cv(ed_x_train_aq_encoded, ed_y_train_aq_encoded)

# BDI
random_forest_cv(ed_x_train_bdi_encoded, ed_y_train_bdi_encoded)

# WURS
random_forest_cv(ed_x_train_wurs_encoded, ed_y_train_wurs_encoded)

Accuracy: 52.50%
F1: 48.88%
Precision: 48.83%
Recall: 50.83%

Accuracy: 75.00%
F1: 84.12%
Precision: 83.89%
Recall: 85.95%

Accuracy: 58.75%
F1: 67.64%
Precision: 68.83%
Recall: 68.00%



### Mean

In [177]:
# AQ
random_forest_cv(mean_x_train_aq_encoded, mean_y_train_aq_encoded)

# BDI
random_forest_cv(mean_x_train_bdi_encoded, mean_y_train_bdi_encoded)

# WURS
random_forest_cv(mean_x_train_wurs_encoded, mean_y_train_wurs_encoded)

Accuracy: 61.25%
F1: 56.41%
Precision: 62.17%
Recall: 61.67%

Accuracy: 77.50%
F1: 86.08%
Precision: 86.01%
Recall: 87.62%

Accuracy: 62.50%
F1: 74.21%
Precision: 69.93%
Recall: 81.00%



### Standard deviation

In [178]:
# AQ
random_forest_cv(std_x_train_aq_encoded, std_y_train_aq_encoded)

# BDI
random_forest_cv(std_x_train_bdi_encoded, std_y_train_bdi_encoded)

# WURS
random_forest_cv(std_x_train_wurs_encoded, std_y_train_wurs_encoded)

Accuracy: 48.75%
F1: 36.35%
Precision: 41.67%
Recall: 35.00%

Accuracy: 81.25%
F1: 89.32%
Precision: 82.32%
Recall: 98.57%

Accuracy: 57.50%
F1: 69.69%
Precision: 65.44%
Recall: 76.00%



### Median

In [179]:
# AQ
random_forest_cv(med_x_train_aq_encoded, med_y_train_aq_encoded)

# BDI
random_forest_cv(med_x_train_bdi_encoded, med_y_train_bdi_encoded)

# WURS
random_forest_cv(med_x_train_wurs_encoded, med_y_train_wurs_encoded)

Accuracy: 57.50%
F1: 46.82%
Precision: 61.55%
Recall: 47.50%

Accuracy: 76.25%
F1: 85.18%
Precision: 84.29%
Recall: 87.62%

Accuracy: 52.50%
F1: 62.57%
Precision: 66.81%
Recall: 66.33%



### Pearsons CC

In [180]:
# AQ
random_forest_cv(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_y_train_aq_encoded)

# BDI
random_forest_cv(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded)

# WURS
random_forest_cv(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded)

Accuracy: 52.50%
F1: 48.81%
Precision: 49.17%
Recall: 52.50%

Accuracy: 60.00%
F1: 73.00%
Precision: 74.70%
Recall: 73.33%

Accuracy: 55.00%
F1: 61.44%
Precision: 71.48%
Recall: 63.67%



### Spearmans CC

In [181]:
# AQ
random_forest_cv(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_y_train_aq_encoded)

# BDI
random_forest_cv(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded)

# WURS
random_forest_cv(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded)

Accuracy: 57.50%
F1: 55.63%
Precision: 57.50%
Recall: 58.33%

Accuracy: 60.00%
F1: 73.65%
Precision: 73.73%
Recall: 74.29%

Accuracy: 52.50%
F1: 63.78%
Precision: 64.38%
Recall: 64.00%



### Range

In [182]:
# AQ
random_forest_cv(xy_range_x_train_aq_encoded, xy_range_y_train_aq_encoded)

# BDI
random_forest_cv(xy_range_x_train_bdi_encoded, xy_range_y_train_bdi_encoded)

# WURS
random_forest_cv(xy_range_x_train_wurs_encoded, xy_range_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 41.26%
Precision: 51.67%
Recall: 37.50%

Accuracy: 73.75%
F1: 84.18%
Precision: 79.55%
Recall: 90.71%

Accuracy: 53.75%
F1: 66.65%
Precision: 62.83%
Recall: 74.00%



### Unique values

In [183]:
# AQ
random_forest_cv(xy_unique_x_train_aq_encoded, xy_unique_y_train_aq_encoded)

# BDI
random_forest_cv(xy_unique_x_train_bdi_encoded, xy_unique_y_train_bdi_encoded)

# WURS
random_forest_cv(xy_unique_x_train_wurs_encoded, xy_unique_y_train_wurs_encoded)

Accuracy: 42.50%
F1: 34.29%
Precision: 36.50%
Recall: 34.17%

Accuracy: 73.75%
F1: 84.16%
Precision: 82.02%
Recall: 87.86%

Accuracy: 53.75%
F1: 65.86%
Precision: 62.07%
Recall: 71.67%



## **SVM**

In [184]:
# SVM classifier with cross validation

def svm_classifier_cv(x_train, y_train, k=10):
    
    # Create support vector machine object and fit classifier
    svm_clf_cv = svm.SVC(random_state=42).fit(x_train, y_train)
    
    # Perform k-fold cross-validation and  calculate accuracy, precision, recall, and F1 scores
    scores = cross_val_score(svm_clf_cv, x_train, y_train, cv=k, scoring='accuracy')
    precision_scores = cross_val_score(svm_clf_cv, x_train, y_train, cv=k, scoring='precision')
    recall_scores = cross_val_score(svm_clf_cv, x_train, y_train, cv=k, scoring='recall')
    f1_scores = cross_val_score(svm_clf_cv, x_train, y_train, cv=k, scoring='f1')

    # Perform k-fold cross-validation and find predicted values
    # y_pred = cross_val_predict(svm_clf_cv, x_train, y_train, cv=k)
    
    # # Print the predicted values for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(y_pred)
    #     print()
    
    # Print the evaluation metrics for each fold
    # for i in range(k):
    #     print(f'Fold {i+1}:')
    #     print(f'Accuracy: {scores[i]:.2f}')
    #     print(f'Precision: {precision_scores[i]:.2f}')
    #     print(f'Recall: {recall_scores[i]:.2f}')
    #     print(f'F1: {f1_scores[i]:.2f}')
    #     print()
    
    # Calculate the mean and standard deviation of the evaluation metrics
    mean_accuracy = scores.mean()
    mean_precision = precision_scores.mean()
    mean_recall = recall_scores.mean()
    mean_f1 = f1_scores.mean()
    
    print(f'Accuracy: {mean_accuracy * 100.0:.2f}%')
    print(f'F1: {mean_f1 * 100.0:.2f}%')
    print(f'Precision: {mean_precision * 100.0:.2f}%')
    print(f'Recall: {mean_recall * 100.0:.2f}%')
    print()

### Least viewed x, y coordinate pairs

In [185]:
# AQ
svm_classifier_cv(lv_x_train_aq_encoded, lv_y_train_aq_encoded)

# BDI
svm_classifier_cv(lv_x_train_bdi_encoded, lv_y_train_bdi_encoded)

# WURS
svm_classifier_cv(lv_x_train_wurs_encoded, lv_y_train_wurs_encoded)

Accuracy: 53.75%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Most viewed x, y coordinate pairs

In [186]:
# AQ
svm_classifier_cv(mv_x_train_aq_encoded, mv_y_train_aq_encoded)

# BDI
svm_classifier_cv(mv_x_train_bdi_encoded, mv_y_train_bdi_encoded)

# WURS
svm_classifier_cv(mv_x_train_wurs_encoded, mv_y_train_wurs_encoded)

Accuracy: 60.00%
F1: 55.42%
Precision: 62.50%
Recall: 52.50%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Neighborhood surrounding most viewed x, y coordinate pairs

In [187]:
# AQ
# svm_classifier_cv(range_x_train_aq_encoded, range_y_train_aq_encoded)

# BDI
# svm_classifier_cv(range_x_train_bdi_encoded, range_y_train_bdi_encoded)

# WURS
# svm_classifier_cv(range_x_train_wurs_encoded, range_y_train_wurs_encoded)

### Euclidean Distance

In [188]:
# AQ
svm_classifier_cv(ed_x_train_aq_encoded, ed_y_train_aq_encoded)

# BDI
svm_classifier_cv(ed_x_train_bdi_encoded, ed_y_train_bdi_encoded)

# WURS
svm_classifier_cv(ed_x_train_wurs_encoded, ed_y_train_wurs_encoded)

Accuracy: 50.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Mean

In [189]:
# AQ
svm_classifier_cv(mean_x_train_aq_encoded, mean_y_train_aq_encoded)

# BDI
svm_classifier_cv(mean_x_train_bdi_encoded, mean_y_train_bdi_encoded)

# WURS
svm_classifier_cv(mean_x_train_wurs_encoded, mean_y_train_wurs_encoded)

Accuracy: 53.75%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Standard deviation

In [190]:
# AQ
svm_classifier_cv(std_x_train_aq_encoded, std_y_train_aq_encoded)

# BDI
svm_classifier_cv(std_x_train_bdi_encoded, std_y_train_bdi_encoded)

# WURS
svm_classifier_cv(std_x_train_wurs_encoded, std_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 14.36%
Precision: 20.83%
Recall: 11.67%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Median

In [191]:
# AQ
svm_classifier_cv(med_x_train_aq_encoded, med_y_train_aq_encoded)

# BDI
svm_classifier_cv(med_x_train_bdi_encoded, med_y_train_bdi_encoded)

# WURS
svm_classifier_cv(med_x_train_wurs_encoded, med_y_train_wurs_encoded)

Accuracy: 53.75%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Pearsons CC

In [192]:
# AQ
svm_classifier_cv(xy_pearson_cc_x_train_aq_encoded, xy_pearson_cc_y_train_aq_encoded)

# BDI
svm_classifier_cv(xy_pearson_cc_x_train_bdi_encoded, xy_pearson_cc_y_train_bdi_encoded)

# WURS
svm_classifier_cv(xy_pearson_cc_x_train_wurs_encoded, xy_pearson_cc_y_train_wurs_encoded)

Accuracy: 42.50%
F1: 26.95%
Precision: 26.98%
Recall: 30.83%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 65.00%
F1: 78.68%
Precision: 65.89%
Recall: 98.33%



### Spearmans CC

In [193]:
# AQ
svm_classifier_cv(xy_spearman_cc_x_train_aq_encoded, xy_spearman_cc_y_train_aq_encoded)

# BDI
svm_classifier_cv(xy_spearman_cc_x_train_bdi_encoded, xy_spearman_cc_y_train_bdi_encoded)

# WURS
svm_classifier_cv(xy_spearman_cc_x_train_wurs_encoded, xy_spearman_cc_y_train_wurs_encoded)

Accuracy: 55.00%
F1: 36.93%
Precision: 54.17%
Recall: 30.83%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 63.75%
F1: 77.66%
Precision: 65.36%
Recall: 96.33%



### Range

In [194]:
# AQ
svm_classifier_cv(xy_range_x_train_aq_encoded, xy_range_y_train_aq_encoded)

# BDI
svm_classifier_cv(xy_range_x_train_bdi_encoded, xy_range_y_train_bdi_encoded)

# WURS
svm_classifier_cv(xy_range_x_train_wurs_encoded, xy_range_y_train_wurs_encoded)

Accuracy: 38.75%
F1: 12.69%
Precision: 19.50%
Recall: 11.67%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



### Unique values

In [195]:
# AQ
svm_classifier_cv(xy_unique_x_train_aq_encoded, xy_unique_y_train_aq_encoded)

# BDI
svm_classifier_cv(xy_unique_x_train_bdi_encoded, xy_unique_y_train_bdi_encoded)

# WURS
svm_classifier_cv(xy_unique_x_train_wurs_encoded, xy_unique_y_train_wurs_encoded)

Accuracy: 51.25%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 80.00%
F1: 88.76%
Precision: 80.00%
Recall: 100.00%

Accuracy: 66.25%
F1: 79.56%
Precision: 66.25%
Recall: 100.00%



# **Train-Test Split Stratified**

In [196]:
# Split function for classification models as we need to convert y input into binary values

def stratified_split(input_feature, clinical_var, clinical_data=clinical_cleaned_df):
    
    # Creating x input feature
    x = input_feature

    # Creating y target variable
    y = clinical_data[clinical_var]

    # Find common indices of these two columns
    if len(x) != len(y):
        common_indices = list(x.index.intersection(y.index))

        # Update x,y variables with only common indices
        x = x.loc[common_indices]
        y = y.loc[common_indices]

    # Encode y variable based on clinical score cut offs
    if clinical_var == 'aq':
        y = y.apply(lambda val: 1 if val >= 22 else 0)
    elif clinical_var == 'bdi':
        y = y.apply(lambda val: 1 if val >= 11 else 0)
    elif clinical_var == 'wurs':
        y = y.apply(lambda val: 1 if val >= 30 else 0)

    # Convert to arrays
    x_arr = np.array(x)
    y_arr = np.array(y)

    # Perform train-test split
    x_train, x_test, y_train, y_test = train_test_split(x_arr, y_arr, test_size=0.2, stratify=y_arr, random_state=42)

    return x_train, x_test, y_train, y_test

## Least viewed x, y coordinate pairs

In [197]:
# least viwed = lv

# AQ
lv_x_train_aq_encoded_strat, lv_x_test_aq_encoded_strat, lv_y_train_aq_encoded_strat, lv_y_test_aq_encoded_strat = stratified_split(filtered_min_coordinates_list, 'aq')

# BDI
lv_x_train_bdi_encoded_strat, lv_x_test_bdi_encoded_strat, lv_y_train_bdi_encoded_strat, lv_y_test_bdi_encoded_strat = stratified_split(filtered_min_coordinates_list, 'bdi')

# WURS
lv_x_train_wurs_encoded_strat, lv_x_test_wurs_encoded_strat, lv_y_train_wurs_encoded_strat, lv_y_test_wurs_encoded_strat = stratified_split(filtered_min_coordinates_list, 'wurs')

## Most viewed x, y coordinate pairs

In [198]:
# most viewed = mv

# AQ
mv_x_train_aq_encoded_strat, mv_x_test_aq_encoded_strat, mv_y_train_aq_encoded_strat, mv_y_test_aq_encoded_strat = stratified_split(filtered_max_coordinates_list, 'aq')

# BDI
mv_x_train_bdi_encoded_strat, mv_x_test_bdi_encoded_strat, mv_y_train_bdi_encoded_strat, mv_y_test_bdi_encoded_strat = stratified_split(filtered_max_coordinates_list, 'bdi')

# WURS
mv_x_train_wurs_encoded_strat, mv_x_test_wurs_encoded_strat, mv_y_train_wurs_encoded_strat, mv_y_test_wurs_encoded_strat = stratified_split(filtered_max_coordinates_list, 'wurs')

## Neighborhood surrounding max x, y coordinate pairs

In [199]:
# AQ
# range_neighborhood_x_train_aq_encoded_strat, range_neighborhood_x_test_aq_encoded_strat, range_neighborhood_y_train_aq_encoded_strat, range_neighborhood_y_test_aq_encoded_strat = split(range_neighborhood_df, 'aq')

# BDI
# range_neighborhood_x_train_bdi_encoded_strat, range_neighborhood_x_test_bdi_encoded_strat, range_neighborhood_y_train_bdi_encoded_strat, range_neighborhood_y_test_bdi_encoded_strat = split(range_neighborhood_df, 'bdi')

# WURS
# range_neighborhood_x_train_wurs_encoded_strat, range_neighborhood_x_test_wurs_encoded_strat, range_neighborhood_y_train_wurs_encoded_strat, range_neighborhood_y_test_wurs_encoded_strat = split(range_neighborhood_df, 'wurs')


## Euclidean Distance

In [200]:
# AQ
ed_x_train_aq_encoded_strat, ed_x_test_aq_encoded_strat, ed_y_train_aq_encoded_strat, ed_y_test_aq_encoded_strat = stratified_split(ed_distances_pd, 'aq')

# BDI
ed_x_train_bdi_encoded_strat, ed_x_test_bdi_encoded_strat, ed_y_train_bdi_encoded_strat, ed_y_test_bdi_encoded_strat = stratified_split(ed_distances_pd, 'bdi')

# WURS
ed_x_train_wurs_encoded_strat, ed_x_test_wurs_encoded_strat, ed_y_train_wurs_encoded_strat, ed_y_test_wurs_encoded_strat = stratified_split(ed_distances_pd, 'wurs')

## Mean

In [201]:
# AQ
mean_x_train_aq_encoded_strat, mean_x_test_aq_encoded_strat, mean_y_train_aq_encoded_strat, mean_y_test_aq_encoded_strat = stratified_split(meanvals_df, 'aq')

# BDI
mean_x_train_bdi_encoded_strat, mean_x_test_bdi_encoded_strat, mean_y_train_bdi_encoded_strat, mean_y_test_bdi_encoded_strat = stratified_split(meanvals_df, 'bdi')

# WURS
mean_x_train_wurs_encoded_strat, mean_x_test_wurs_encoded_strat, mean_y_train_wurs_encoded_strat, mean_y_test_wurs_encoded_strat = stratified_split(meanvals_df, 'wurs')

## Standard deviation

In [202]:
# AQ
std_x_train_aq_encoded_strat, std_x_test_aq_encoded_strat, std_y_train_aq_encoded_strat, std_y_test_aq_encoded_strat = stratified_split(stdvals_df, 'aq')

# BDI
std_x_train_bdi_encoded_strat, std_x_test_bdi_encoded_strat, std_y_train_bdi_encoded_strat, std_y_test_bdi_encoded_strat = stratified_split(stdvals_df, 'bdi')

# WURS
std_x_train_wurs_encoded_strat, std_x_test_wurs_encoded_strat, std_y_train_wurs_encoded_strat, std_y_test_wurs_encoded_strat = stratified_split(stdvals_df, 'wurs')

## Median

In [203]:
# AQ
med_x_train_aq_encoded_strat, med_x_test_aq_encoded_strat, med_y_train_aq_encoded_strat, med_y_test_aq_encoded_strat = stratified_split(medvals_df, 'aq')

# BDI
med_x_train_bdi_encoded_strat, med_x_test_bdi_encoded_strat, med_y_train_bdi_encoded_strat, med_y_test_bdi_encoded_strat = stratified_split(medvals_df, 'bdi')

# WURS
med_x_train_wurs_encoded_strat, med_x_test_wurs_encoded_strat, med_y_train_wurs_encoded_strat, med_y_test_wurs_encoded_strat = stratified_split(medvals_df, 'wurs')

## Pearsons correlation coefficient

In [204]:
# AQ
xy_pearson_cc_x_train_aq_encoded_strat, xy_pearson_cc_x_test_aq_encoded_strat, xy_pearson_cc_y_train_aq_encoded_strat, xy_pearson_cc_y_test_aq_encoded_strat = stratified_split(xy_pearson_df, 'aq')

# BDI
xy_pearson_cc_x_train_bdi_encoded_strat, xy_pearson_cc_x_test_bdi_encoded_strat, xy_pearson_cc_y_train_bdi_encoded_strat, xy_pearson_cc_y_test_bdi_encoded_strat = stratified_split(xy_pearson_df, 'bdi')

# WURS
xy_pearson_cc_x_train_wurs_encoded_strat, xy_pearson_cc_x_test_wurs_encoded_strat, xy_pearson_cc_y_train_wurs_encoded_strat, xy_pearson_cc_y_test_wurs_encoded_strat = stratified_split(xy_pearson_df, 'wurs')

## Spearmans correlation coefficient

In [205]:
# AQ
xy_spearman_cc_x_train_aq_encoded_strat, xy_spearman_cc_x_test_aq_encoded_strat, xy_spearman_cc_y_train_aq_encoded_strat, xy_spearman_cc_y_test_aq_encoded_strat = stratified_split(xy_spearman_df, 'aq')

# BDI
xy_spearman_cc_x_train_bdi_encoded_strat, xy_spearman_cc_x_test_bdi_encoded_strat, xy_spearman_cc_y_train_bdi_encoded_strat, xy_spearman_cc_y_test_bdi_encoded_strat = stratified_split(xy_spearman_df, 'bdi')

# WURS
xy_spearman_cc_x_train_wurs_encoded_strat, xy_spearman_cc_x_test_wurs_encoded_strat, xy_spearman_cc_y_train_wurs_encoded_strat, xy_spearman_cc_y_test_wurs_encoded_strat = stratified_split(xy_spearman_df, 'wurs')

## Range

In [206]:
# AQ
xy_range_x_train_aq_encoded_strat, xy_range_x_test_aq_encoded_strat, xy_range_y_train_aq_encoded_strat, xy_range_y_test_aq_encoded_strat = stratified_split(xy_range_df, 'aq')

# BDI
xy_range_x_train_bdi_encoded_strat, xy_range_x_test_bdi_encoded_strat, xy_range_y_train_bdi_encoded_strat, xy_range_y_test_bdi_encoded_strat = stratified_split(xy_range_df, 'bdi')

# WURS
xy_range_x_train_wurs_encoded_strat, xy_range_x_test_wurs_encoded_strat, xy_range_y_train_wurs_encoded_strat, xy_range_y_test_wurs_encoded_strat = stratified_split(xy_range_df, 'wurs')

## Unique values mean

In [207]:
# AQ
xy_unique_x_train_aq_encoded_strat, xy_unique_x_test_aq_encoded_strat, xy_unique_y_train_aq_encoded_strat, xy_unique_y_test_aq_encoded_strat = stratified_split(unique_vals_mean_df, 'aq')

# BDI
xy_unique_x_train_bdi_encoded_strat, xy_unique_x_test_bdi_encoded_strat, xy_unique_y_train_bdi_encoded_strat, xy_unique_y_test_bdi_encoded_strat = stratified_split(unique_vals_mean_df, 'bdi')

# WURS
xy_unique_x_train_wurs_encoded_strat, xy_unique_x_test_wurs_encoded_strat, xy_unique_y_train_wurs_encoded_strat, xy_unique_y_test_wurs_encoded_strat = stratified_split(unique_vals_mean_df, 'wurs')

# **Classifiers with Stratified Train-Test Split**

## Logistic Regression

### Least viewed x, y coordinate pairs

In [208]:
# AQ
logistic_regression(lv_x_train_aq_encoded_strat, lv_x_test_aq_encoded_strat, lv_y_train_aq_encoded_strat, lv_y_test_aq_encoded_strat)

# BDI
logistic_regression(lv_x_train_bdi_encoded_strat, lv_x_test_bdi_encoded_strat, lv_y_train_bdi_encoded_strat, lv_y_test_bdi_encoded_strat)

# WURS
logistic_regression(lv_x_train_wurs_encoded_strat, lv_x_test_wurs_encoded_strat, lv_y_train_wurs_encoded_strat, lv_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 18.20%
Precision: 50.00%
Recall: 11.10%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Most viewed x, y coordinate pairs

In [209]:
# AQ
logistic_regression(mv_x_train_aq_encoded_strat, mv_x_test_aq_encoded_strat, mv_y_train_aq_encoded_strat, mv_y_test_aq_encoded_strat)

# BDI
logistic_regression(mv_x_train_bdi_encoded_strat, mv_x_test_bdi_encoded_strat, mv_y_train_bdi_encoded_strat, mv_y_test_bdi_encoded_strat)

# WURS
logistic_regression(mv_x_train_wurs_encoded_strat, mv_x_test_wurs_encoded_strat, mv_y_train_wurs_encoded_strat, mv_y_test_wurs_encoded_strat)

Accuracy: 70.00%
F1: 57.10%
Precision: 80.00%
Recall: 44.40%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Euclidean Distance

In [210]:
# AQ
logistic_regression(ed_x_train_aq_encoded_strat, ed_x_test_aq_encoded_strat, ed_y_train_aq_encoded_strat, ed_y_test_aq_encoded_strat)

# BDI
logistic_regression(ed_x_train_bdi_encoded_strat, ed_x_test_bdi_encoded_strat, ed_y_train_bdi_encoded_strat, ed_y_test_bdi_encoded_strat)

# WURS
logistic_regression(ed_x_train_wurs_encoded_strat, ed_x_test_wurs_encoded_strat, ed_y_train_wurs_encoded_strat, ed_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Mean

In [211]:
# AQ
logistic_regression(mean_x_train_aq_encoded_strat, mean_x_test_aq_encoded_strat, mean_y_train_aq_encoded_strat, mean_y_test_aq_encoded_strat)

# BDI
logistic_regression(mean_x_train_bdi_encoded_strat, mean_x_test_bdi_encoded_strat, mean_y_train_bdi_encoded_strat, mean_y_test_bdi_encoded_strat)

# WURS
logistic_regression(mean_x_train_wurs_encoded_strat, mean_x_test_wurs_encoded_strat, mean_y_train_wurs_encoded_strat, mean_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 47.10%
Precision: 50.00%
Recall: 44.40%

Accuracy: 85.00%
F1: 91.40%
Precision: 88.90%
Recall: 94.10%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Standard deviation

In [212]:
# AQ
logistic_regression(std_x_train_aq_encoded_strat, std_x_test_aq_encoded_strat, std_y_train_aq_encoded_strat, std_y_test_aq_encoded_strat)

# BDI
logistic_regression(std_x_train_bdi_encoded_strat, std_x_test_bdi_encoded_strat, std_y_train_bdi_encoded_strat, std_y_test_bdi_encoded_strat)

# WURS
logistic_regression(std_x_train_wurs_encoded_strat, std_x_test_wurs_encoded_strat, std_y_train_wurs_encoded_strat, std_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 16.70%
Precision: 33.30%
Recall: 11.10%

Accuracy: 80.00%
F1: 88.90%
Precision: 84.20%
Recall: 94.10%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Median

In [213]:
# AQ
logistic_regression(med_x_train_aq_encoded_strat, med_x_test_aq_encoded_strat, med_y_train_aq_encoded_strat, med_y_test_aq_encoded_strat)

# BDI
logistic_regression(med_x_train_bdi_encoded_strat, med_x_test_bdi_encoded_strat, med_y_train_bdi_encoded_strat, med_y_test_bdi_encoded_strat)

# WURS
logistic_regression(med_x_train_wurs_encoded_strat, med_x_test_wurs_encoded_strat, med_y_train_wurs_encoded_strat, med_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 47.10%
Precision: 50.00%
Recall: 44.40%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Pearsons CC

In [214]:
# AQ
logistic_regression(xy_pearson_cc_x_train_aq_encoded_strat, xy_pearson_cc_x_test_aq_encoded_strat, xy_pearson_cc_y_train_aq_encoded_strat, xy_pearson_cc_y_test_aq_encoded_strat)

# BDI
logistic_regression(xy_pearson_cc_x_train_bdi_encoded_strat, xy_pearson_cc_x_test_bdi_encoded_strat, xy_pearson_cc_y_train_bdi_encoded_strat, xy_pearson_cc_y_test_bdi_encoded_strat)

# WURS
logistic_regression(xy_pearson_cc_x_train_wurs_encoded_strat, xy_pearson_cc_x_test_wurs_encoded_strat, xy_pearson_cc_y_train_wurs_encoded_strat, xy_pearson_cc_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Spearmans CC

In [215]:
# AQ
logistic_regression(xy_spearman_cc_x_train_aq_encoded_strat, xy_spearman_cc_x_test_aq_encoded_strat, xy_spearman_cc_y_train_aq_encoded_strat, xy_spearman_cc_y_test_aq_encoded_strat)

# BDI
logistic_regression(xy_spearman_cc_x_train_bdi_encoded_strat, xy_spearman_cc_x_test_bdi_encoded_strat, xy_spearman_cc_y_train_bdi_encoded_strat, xy_spearman_cc_y_test_bdi_encoded_strat)

# WURS
logistic_regression(xy_spearman_cc_x_train_wurs_encoded_strat, xy_spearman_cc_x_test_wurs_encoded_strat, xy_spearman_cc_y_train_wurs_encoded_strat, xy_spearman_cc_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 0.00%
Precision: 0.00%
Recall: 0.00%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Range

In [216]:
# AQ
logistic_regression(xy_range_x_train_aq_encoded_strat, xy_range_x_test_aq_encoded_strat, xy_range_y_train_aq_encoded_strat, xy_range_y_test_aq_encoded_strat)

# BDI
logistic_regression(xy_range_x_train_bdi_encoded_strat, xy_range_x_test_bdi_encoded_strat, xy_range_y_train_bdi_encoded_strat, xy_range_y_test_bdi_encoded_strat)

# WURS
logistic_regression(xy_range_x_train_wurs_encoded_strat, xy_range_x_test_wurs_encoded_strat, xy_range_y_train_wurs_encoded_strat, xy_range_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 37.50%
Precision: 42.90%
Recall: 33.30%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



### Unique values

In [217]:
# AQ
logistic_regression(xy_unique_x_train_aq_encoded_strat, xy_unique_x_test_aq_encoded_strat, xy_unique_y_train_aq_encoded_strat, xy_unique_y_test_aq_encoded_strat)

# BDI
logistic_regression(xy_unique_x_train_bdi_encoded_strat, xy_unique_x_test_bdi_encoded_strat, xy_unique_y_train_bdi_encoded_strat, xy_unique_y_test_bdi_encoded_strat)

# WURS
logistic_regression(xy_unique_x_train_wurs_encoded_strat, xy_unique_x_test_wurs_encoded_strat, xy_unique_y_train_wurs_encoded_strat, xy_unique_y_test_wurs_encoded_strat)

Accuracy: 65.00%
F1: 46.20%
Precision: 75.00%
Recall: 33.30%

Accuracy: 85.00%
F1: 91.90%
Precision: 85.00%
Recall: 100.00%

Accuracy: 70.00%
F1: 82.40%
Precision: 70.00%
Recall: 100.00%



## Decision Tree

### Least viewed x, y coordinate pairs

In [218]:
# AQ
decision_tree_classifier(lv_x_train_aq_encoded_strat, lv_x_test_aq_encoded_strat, lv_y_train_aq_encoded_strat, lv_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(lv_x_train_bdi_encoded_strat, lv_x_test_bdi_encoded_strat, lv_y_train_bdi_encoded_strat, lv_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(lv_x_train_wurs_encoded_strat, lv_x_test_wurs_encoded_strat, lv_y_train_wurs_encoded_strat, lv_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 50.00%
Precision: 45.50%
Recall: 55.60%

Accuracy: 80.00%
F1: 88.90%
Precision: 84.20%
Recall: 94.10%

Accuracy: 70.00%
F1: 76.90%
Precision: 83.30%
Recall: 71.40%



### Most viewed x, y coordinate pairs

In [219]:
# AQ
decision_tree_classifier(mv_x_train_aq_encoded_strat, mv_x_test_aq_encoded_strat, mv_y_train_aq_encoded_strat, mv_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(mv_x_train_bdi_encoded_strat, mv_x_test_bdi_encoded_strat, mv_y_train_bdi_encoded_strat, mv_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(mv_x_train_wurs_encoded_strat, mv_x_test_wurs_encoded_strat, mv_y_train_wurs_encoded_strat, mv_y_test_wurs_encoded_strat)

Accuracy: 65.00%
F1: 63.20%
Precision: 60.00%
Recall: 66.70%

Accuracy: 85.00%
F1: 91.40%
Precision: 88.90%
Recall: 94.10%

Accuracy: 60.00%
F1: 71.40%
Precision: 71.40%
Recall: 71.40%



### Euclidean Distance

In [220]:
# AQ
decision_tree_classifier(ed_x_train_aq_encoded_strat, ed_x_test_aq_encoded_strat, ed_y_train_aq_encoded_strat, ed_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(ed_x_train_bdi_encoded_strat, ed_x_test_bdi_encoded_strat, ed_y_train_bdi_encoded_strat, ed_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(ed_x_train_wurs_encoded_strat, ed_x_test_wurs_encoded_strat, ed_y_train_wurs_encoded_strat, ed_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 50.00%
Precision: 45.50%
Recall: 55.60%

Accuracy: 80.00%
F1: 88.20%
Precision: 88.20%
Recall: 88.20%

Accuracy: 60.00%
F1: 71.40%
Precision: 71.40%
Recall: 71.40%



### Mean

In [221]:
# AQ
decision_tree_classifier(mean_x_train_aq_encoded_strat, mean_x_test_aq_encoded_strat, mean_y_train_aq_encoded_strat, mean_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(mean_x_train_bdi_encoded_strat, mean_x_test_bdi_encoded_strat, mean_y_train_bdi_encoded_strat, mean_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(mean_x_train_wurs_encoded_strat, mean_x_test_wurs_encoded_strat, mean_y_train_wurs_encoded_strat, mean_y_test_wurs_encoded_strat)

Accuracy: 75.00%
F1: 66.70%
Precision: 83.30%
Recall: 55.60%

Accuracy: 85.00%
F1: 91.40%
Precision: 88.90%
Recall: 94.10%

Accuracy: 50.00%
F1: 54.50%
Precision: 75.00%
Recall: 42.90%



### Standard deviation

In [222]:
# AQ
decision_tree_classifier(std_x_train_aq_encoded_strat, std_x_test_aq_encoded_strat, std_y_train_aq_encoded_strat, std_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(std_x_train_bdi_encoded_strat, std_x_test_bdi_encoded_strat, std_y_train_bdi_encoded_strat, std_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(std_x_train_wurs_encoded_strat, std_x_test_wurs_encoded_strat, std_y_train_wurs_encoded_strat, std_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 50.00%
Precision: 45.50%
Recall: 55.60%

Accuracy: 65.00%
F1: 77.40%
Precision: 85.70%
Recall: 70.60%

Accuracy: 50.00%
F1: 61.50%
Precision: 66.70%
Recall: 57.10%



### Median

In [223]:
# AQ
decision_tree_classifier(med_x_train_aq_encoded_strat, med_x_test_aq_encoded_strat, med_y_train_aq_encoded_strat, med_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(med_x_train_bdi_encoded_strat, med_x_test_bdi_encoded_strat, med_y_train_bdi_encoded_strat, med_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(med_x_train_wurs_encoded_strat, med_x_test_wurs_encoded_strat, med_y_train_wurs_encoded_strat, med_y_test_wurs_encoded_strat)

Accuracy: 60.00%
F1: 50.00%
Precision: 57.10%
Recall: 44.40%

Accuracy: 70.00%
F1: 82.40%
Precision: 82.40%
Recall: 82.40%

Accuracy: 60.00%
F1: 71.40%
Precision: 71.40%
Recall: 71.40%



### Pearsons CC

In [224]:
# AQ
decision_tree_classifier(xy_pearson_cc_x_train_aq_encoded_strat, xy_pearson_cc_x_test_aq_encoded_strat, xy_pearson_cc_y_train_aq_encoded_strat, xy_pearson_cc_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(xy_pearson_cc_x_train_bdi_encoded_strat, xy_pearson_cc_x_test_bdi_encoded_strat, xy_pearson_cc_y_train_bdi_encoded_strat, xy_pearson_cc_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(xy_pearson_cc_x_train_wurs_encoded_strat, xy_pearson_cc_x_test_wurs_encoded_strat, xy_pearson_cc_y_train_wurs_encoded_strat, xy_pearson_cc_y_test_wurs_encoded_strat)

Accuracy: 45.00%
F1: 35.30%
Precision: 37.50%
Recall: 33.30%

Accuracy: 70.00%
F1: 82.40%
Precision: 82.40%
Recall: 82.40%

Accuracy: 75.00%
F1: 82.80%
Precision: 80.00%
Recall: 85.70%



### Spearmans CC

In [225]:
# AQ
decision_tree_classifier(xy_spearman_cc_x_train_aq_encoded_strat, xy_spearman_cc_x_test_aq_encoded_strat, xy_spearman_cc_y_train_aq_encoded_strat, xy_spearman_cc_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(xy_spearman_cc_x_train_bdi_encoded_strat, xy_spearman_cc_x_test_bdi_encoded_strat, xy_spearman_cc_y_train_bdi_encoded_strat, xy_spearman_cc_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(xy_spearman_cc_x_train_wurs_encoded_strat, xy_spearman_cc_x_test_wurs_encoded_strat, xy_spearman_cc_y_train_wurs_encoded_strat, xy_spearman_cc_y_test_wurs_encoded_strat)

Accuracy: 60.00%
F1: 55.60%
Precision: 55.60%
Recall: 55.60%

Accuracy: 70.00%
F1: 82.40%
Precision: 82.40%
Recall: 82.40%

Accuracy: 60.00%
F1: 71.40%
Precision: 71.40%
Recall: 71.40%



### Range

In [226]:
# AQ
decision_tree_classifier(xy_range_x_train_aq_encoded_strat, xy_range_x_test_aq_encoded_strat, xy_range_y_train_aq_encoded_strat, xy_range_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(xy_range_x_train_bdi_encoded_strat, xy_range_x_test_bdi_encoded_strat, xy_range_y_train_bdi_encoded_strat, xy_range_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(xy_range_x_train_wurs_encoded_strat, xy_range_x_test_wurs_encoded_strat, xy_range_y_train_wurs_encoded_strat, xy_range_y_test_wurs_encoded_strat)

Accuracy: 60.00%
F1: 42.90%
Precision: 60.00%
Recall: 33.30%

Accuracy: 60.00%
F1: 75.00%
Precision: 80.00%
Recall: 70.60%

Accuracy: 55.00%
F1: 69.00%
Precision: 66.70%
Recall: 71.40%



### Unique values

In [227]:
# AQ
decision_tree_classifier(xy_unique_x_train_aq_encoded_strat, xy_unique_x_test_aq_encoded_strat, xy_unique_y_train_aq_encoded_strat, xy_unique_y_test_aq_encoded_strat)

# BDI
decision_tree_classifier(xy_unique_x_train_bdi_encoded_strat, xy_unique_x_test_bdi_encoded_strat, xy_unique_y_train_bdi_encoded_strat, xy_unique_y_test_bdi_encoded_strat)

# WURS
decision_tree_classifier(xy_unique_x_train_wurs_encoded_strat, xy_unique_x_test_wurs_encoded_strat, xy_unique_y_train_wurs_encoded_strat, xy_unique_y_test_wurs_encoded_strat)

Accuracy: 45.00%
F1: 42.10%
Precision: 40.00%
Recall: 44.40%

Accuracy: 80.00%
F1: 88.90%
Precision: 84.20%
Recall: 94.10%

Accuracy: 55.00%
F1: 64.00%
Precision: 72.70%
Recall: 57.10%



## Random Forest

### Least viewed x, y coordinate pairs

In [228]:
# AQ
random_forest_classifier(lv_x_train_aq_encoded_strat, lv_x_test_aq_encoded_strat, lv_y_train_aq_encoded_strat, lv_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(lv_x_train_bdi_encoded_strat, lv_x_test_bdi_encoded_strat, lv_y_train_bdi_encoded_strat, lv_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(lv_x_train_wurs_encoded_strat, lv_x_test_wurs_encoded_strat, lv_y_train_wurs_encoded_strat, lv_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 54.90%
Precision: 56.20%
Recall: 56.10%

Accuracy: 80.00%
F1: 44.40%
Precision: 42.10%
Recall: 47.10%

Accuracy: 75.00%
F1: 71.50%
Precision: 70.90%
Recall: 72.60%



### Most viewed x, y coordinate pairs

In [229]:
# AQ
random_forest_classifier(mv_x_train_aq_encoded_strat, mv_x_test_aq_encoded_strat, mv_y_train_aq_encoded_strat, mv_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(mv_x_train_bdi_encoded_strat, mv_x_test_bdi_encoded_strat, mv_y_train_bdi_encoded_strat, mv_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(mv_x_train_wurs_encoded_strat, mv_x_test_wurs_encoded_strat, mv_y_train_wurs_encoded_strat, mv_y_test_wurs_encoded_strat)

Accuracy: 70.00%
F1: 68.80%
Precision: 70.30%
Recall: 68.70%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 65.00%
F1: 56.10%
Precision: 56.70%
Recall: 56.00%



### Euclidean Distance

In [230]:
# AQ
random_forest_classifier(ed_x_train_aq_encoded_strat, ed_x_test_aq_encoded_strat, ed_y_train_aq_encoded_strat, ed_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(ed_x_train_bdi_encoded_strat, ed_x_test_bdi_encoded_strat, ed_y_train_bdi_encoded_strat, ed_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(ed_x_train_wurs_encoded_strat, ed_x_test_wurs_encoded_strat, ed_y_train_wurs_encoded_strat, ed_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 50.00%
Precision: 50.50%
Recall: 50.50%

Accuracy: 80.00%
F1: 60.80%
Precision: 60.80%
Recall: 60.80%

Accuracy: 60.00%
F1: 52.40%
Precision: 52.40%
Recall: 52.40%



### Mean

In [231]:
# AQ
random_forest_classifier(mean_x_train_aq_encoded_strat, mean_x_test_aq_encoded_strat, mean_y_train_aq_encoded_strat, mean_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(mean_x_train_bdi_encoded_strat, mean_x_test_bdi_encoded_strat, mean_y_train_bdi_encoded_strat, mean_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(mean_x_train_wurs_encoded_strat, mean_x_test_wurs_encoded_strat, mean_y_train_wurs_encoded_strat, mean_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 52.00%
Precision: 53.60%
Recall: 53.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 42.10%
Recall: 47.10%

Accuracy: 60.00%
F1: 56.00%
Precision: 56.20%
Recall: 57.10%



### Standard deviation

In [232]:
# AQ
random_forest_classifier(std_x_train_aq_encoded_strat, std_x_test_aq_encoded_strat, std_y_train_aq_encoded_strat, std_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(std_x_train_bdi_encoded_strat, std_x_test_bdi_encoded_strat, std_y_train_bdi_encoded_strat, std_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(std_x_train_wurs_encoded_strat, std_x_test_wurs_encoded_strat, std_y_train_wurs_encoded_strat, std_y_test_wurs_encoded_strat)

Accuracy: 50.00%
F1: 50.00%
Precision: 50.50%
Recall: 50.50%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 65.00%
F1: 49.80%
Precision: 52.00%
Recall: 51.20%



### Median

In [233]:
# AQ
random_forest_classifier(med_x_train_aq_encoded_strat, med_x_test_aq_encoded_strat, med_y_train_aq_encoded_strat, med_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(med_x_train_bdi_encoded_strat, med_x_test_bdi_encoded_strat, med_y_train_bdi_encoded_strat, med_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(med_x_train_wurs_encoded_strat, med_x_test_wurs_encoded_strat, med_y_train_wurs_encoded_strat, med_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 48.70%
Precision: 53.10%
Recall: 52.00%

Accuracy: 80.00%
F1: 44.40%
Precision: 42.10%
Recall: 47.10%

Accuracy: 55.00%
F1: 48.70%
Precision: 48.90%
Recall: 48.80%



### Pearsons CC

In [234]:
# AQ
random_forest_classifier(xy_pearson_cc_x_train_aq_encoded_strat, xy_pearson_cc_x_test_aq_encoded_strat, xy_pearson_cc_y_train_aq_encoded_strat, xy_pearson_cc_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(xy_pearson_cc_x_train_bdi_encoded_strat, xy_pearson_cc_x_test_bdi_encoded_strat, xy_pearson_cc_y_train_bdi_encoded_strat, xy_pearson_cc_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(xy_pearson_cc_x_train_wurs_encoded_strat, xy_pearson_cc_x_test_wurs_encoded_strat, xy_pearson_cc_y_train_wurs_encoded_strat, xy_pearson_cc_y_test_wurs_encoded_strat)

Accuracy: 45.00%
F1: 43.70%
Precision: 43.80%
Recall: 43.90%

Accuracy: 70.00%
F1: 41.20%
Precision: 41.20%
Recall: 41.20%

Accuracy: 75.00%
F1: 68.70%
Precision: 70.00%
Recall: 67.90%



### Spearmans CC

In [235]:
# AQ
random_forest_classifier(xy_spearman_cc_x_train_aq_encoded_strat, xy_spearman_cc_x_test_aq_encoded_strat, xy_spearman_cc_y_train_aq_encoded_strat, xy_spearman_cc_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(xy_spearman_cc_x_train_bdi_encoded_strat, xy_spearman_cc_x_test_bdi_encoded_strat, xy_spearman_cc_y_train_bdi_encoded_strat, xy_spearman_cc_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(xy_spearman_cc_x_train_wurs_encoded_strat, xy_spearman_cc_x_test_wurs_encoded_strat, xy_spearman_cc_y_train_wurs_encoded_strat, xy_spearman_cc_y_test_wurs_encoded_strat)

Accuracy: 60.00%
F1: 59.60%
Precision: 59.60%
Recall: 59.60%

Accuracy: 70.00%
F1: 41.20%
Precision: 41.20%
Recall: 41.20%

Accuracy: 60.00%
F1: 52.40%
Precision: 52.40%
Recall: 52.40%



### Range

In [236]:
# AQ
random_forest_classifier(xy_range_x_train_aq_encoded_strat, xy_range_x_test_aq_encoded_strat, xy_range_y_train_aq_encoded_strat, xy_range_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(xy_range_x_train_bdi_encoded_strat, xy_range_x_test_bdi_encoded_strat, xy_range_y_train_bdi_encoded_strat, xy_range_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(xy_range_x_train_wurs_encoded_strat, xy_range_x_test_wurs_encoded_strat, xy_range_y_train_wurs_encoded_strat, xy_range_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 52.00%
Precision: 53.60%
Recall: 53.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 55.00%
F1: 35.50%
Precision: 32.40%
Recall: 39.30%



### Unique values

In [237]:
# AQ
random_forest_classifier(xy_unique_x_train_aq_encoded_strat, xy_unique_x_test_aq_encoded_strat, xy_unique_y_train_aq_encoded_strat, xy_unique_y_test_aq_encoded_strat)

# BDI
random_forest_classifier(xy_unique_x_train_bdi_encoded_strat, xy_unique_x_test_bdi_encoded_strat, xy_unique_y_train_bdi_encoded_strat, xy_unique_y_test_bdi_encoded_strat)

# WURS
random_forest_classifier(xy_unique_x_train_wurs_encoded_strat, xy_unique_x_test_wurs_encoded_strat, xy_unique_y_train_wurs_encoded_strat, xy_unique_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 52.00%
Precision: 53.60%
Recall: 53.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 60.00%
F1: 46.70%
Precision: 46.90%
Recall: 47.60%



## SVM

### Least viewed x, y coordinate pairs

In [238]:
# AQ
svm_classifier(lv_x_train_aq_encoded_strat, lv_x_test_aq_encoded_strat, lv_y_train_aq_encoded_strat, lv_y_test_aq_encoded_strat)

# BDI
svm_classifier(lv_x_train_bdi_encoded_strat, lv_x_test_bdi_encoded_strat, lv_y_train_bdi_encoded_strat, lv_y_test_bdi_encoded_strat)

# WURS
svm_classifier(lv_x_train_wurs_encoded_strat, lv_x_test_wurs_encoded_strat, lv_y_train_wurs_encoded_strat, lv_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Most viewed x, y coordinate pairs

In [239]:
# AQ
svm_classifier(mv_x_train_aq_encoded_strat, mv_x_test_aq_encoded_strat, mv_y_train_aq_encoded_strat, mv_y_test_aq_encoded_strat)

# BDI
svm_classifier(mv_x_train_bdi_encoded_strat, mv_x_test_bdi_encoded_strat, mv_y_train_bdi_encoded_strat, mv_y_test_bdi_encoded_strat)

# WURS
svm_classifier(mv_x_train_wurs_encoded_strat, mv_x_test_wurs_encoded_strat, mv_y_train_wurs_encoded_strat, mv_y_test_wurs_encoded_strat)

Accuracy: 70.00%
F1: 67.00%
Precision: 73.30%
Recall: 67.70%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Euclidean Distance

In [240]:
# AQ
svm_classifier(ed_x_train_aq_encoded_strat, ed_x_test_aq_encoded_strat, ed_y_train_aq_encoded_strat, ed_y_test_aq_encoded_strat)

# BDI
svm_classifier(ed_x_train_bdi_encoded_strat, ed_x_test_bdi_encoded_strat, ed_y_train_bdi_encoded_strat, ed_y_test_bdi_encoded_strat)

# WURS
svm_classifier(ed_x_train_wurs_encoded_strat, ed_x_test_wurs_encoded_strat, ed_y_train_wurs_encoded_strat, ed_y_test_wurs_encoded_strat)

Accuracy: 40.00%
F1: 28.60%
Precision: 23.50%
Recall: 36.40%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Mean

In [241]:
# AQ
svm_classifier(mean_x_train_aq_encoded_strat, mean_x_test_aq_encoded_strat, mean_y_train_aq_encoded_strat, mean_y_test_aq_encoded_strat)

# BDI
svm_classifier(mean_x_train_bdi_encoded_strat, mean_x_test_bdi_encoded_strat, mean_y_train_bdi_encoded_strat, mean_y_test_bdi_encoded_strat)

# WURS
svm_classifier(mean_x_train_wurs_encoded_strat, mean_x_test_wurs_encoded_strat, mean_y_train_wurs_encoded_strat, mean_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Standard deviation

In [242]:
# AQ
svm_classifier(std_x_train_aq_encoded_strat, std_x_test_aq_encoded_strat, std_y_train_aq_encoded_strat, std_y_test_aq_encoded_strat)

# BDI
svm_classifier(std_x_train_bdi_encoded_strat, std_x_test_bdi_encoded_strat, std_y_train_bdi_encoded_strat, std_y_test_bdi_encoded_strat)

# WURS
svm_classifier(std_x_train_wurs_encoded_strat, std_x_test_wurs_encoded_strat, std_y_train_wurs_encoded_strat, std_y_test_wurs_encoded_strat)

Accuracy: 60.00%
F1: 46.70%
Precision: 78.90%
Recall: 55.60%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Median

In [243]:
# AQ
svm_classifier(med_x_train_aq_encoded_strat, med_x_test_aq_encoded_strat, med_y_train_aq_encoded_strat, med_y_test_aq_encoded_strat)

# BDI
svm_classifier(med_x_train_bdi_encoded_strat, med_x_test_bdi_encoded_strat, med_y_train_bdi_encoded_strat, med_y_test_bdi_encoded_strat)

# WURS
svm_classifier(med_x_train_wurs_encoded_strat, med_x_test_wurs_encoded_strat, med_y_train_wurs_encoded_strat, med_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Pearsons CC

In [244]:
# AQ
svm_classifier(xy_pearson_cc_x_train_aq_encoded_strat, xy_pearson_cc_x_test_aq_encoded_strat, xy_pearson_cc_y_train_aq_encoded_strat, xy_pearson_cc_y_test_aq_encoded_strat)

# BDI
svm_classifier(xy_pearson_cc_x_train_bdi_encoded_strat, xy_pearson_cc_x_test_bdi_encoded_strat, xy_pearson_cc_y_train_bdi_encoded_strat, xy_pearson_cc_y_test_bdi_encoded_strat)

# WURS
svm_classifier(xy_pearson_cc_x_train_wurs_encoded_strat, xy_pearson_cc_x_test_wurs_encoded_strat, xy_pearson_cc_y_train_wurs_encoded_strat, xy_pearson_cc_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Spearmans CC

In [245]:
# AQ
svm_classifier(xy_spearman_cc_x_train_aq_encoded_strat, xy_spearman_cc_x_test_aq_encoded_strat, xy_spearman_cc_y_train_aq_encoded_strat, xy_spearman_cc_y_test_aq_encoded_strat)

# BDI
svm_classifier(xy_spearman_cc_x_train_bdi_encoded_strat, xy_spearman_cc_x_test_bdi_encoded_strat, xy_spearman_cc_y_train_bdi_encoded_strat, xy_spearman_cc_y_test_bdi_encoded_strat)

# WURS
svm_classifier(xy_spearman_cc_x_train_wurs_encoded_strat, xy_spearman_cc_x_test_wurs_encoded_strat, xy_spearman_cc_y_train_wurs_encoded_strat, xy_spearman_cc_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 43.60%
Precision: 52.80%
Recall: 51.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Range

In [246]:
# AQ
svm_classifier(xy_range_x_train_aq_encoded_strat, xy_range_x_test_aq_encoded_strat, xy_range_y_train_aq_encoded_strat, xy_range_y_test_aq_encoded_strat)

# BDI
svm_classifier(xy_range_x_train_bdi_encoded_strat, xy_range_x_test_bdi_encoded_strat, xy_range_y_train_bdi_encoded_strat, xy_range_y_test_bdi_encoded_strat)

# WURS
svm_classifier(xy_range_x_train_wurs_encoded_strat, xy_range_x_test_wurs_encoded_strat, xy_range_y_train_wurs_encoded_strat, xy_range_y_test_wurs_encoded_strat)

Accuracy: 45.00%
F1: 31.00%
Precision: 25.00%
Recall: 40.90%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%



### Unique values

In [247]:
# AQ
svm_classifier(xy_unique_x_train_aq_encoded_strat, xy_unique_x_test_aq_encoded_strat, xy_unique_y_train_aq_encoded_strat, xy_unique_y_test_aq_encoded_strat)

# BDI
svm_classifier(xy_unique_x_train_bdi_encoded_strat, xy_unique_x_test_bdi_encoded_strat, xy_unique_y_train_bdi_encoded_strat, xy_unique_y_test_bdi_encoded_strat)

# WURS
svm_classifier(xy_unique_x_train_wurs_encoded_strat, xy_unique_x_test_wurs_encoded_strat, xy_unique_y_train_wurs_encoded_strat, xy_unique_y_test_wurs_encoded_strat)

Accuracy: 55.00%
F1: 35.50%
Precision: 27.50%
Recall: 50.00%

Accuracy: 85.00%
F1: 45.90%
Precision: 42.50%
Recall: 50.00%

Accuracy: 70.00%
F1: 41.20%
Precision: 35.00%
Recall: 50.00%

