### GRID SEARCH ALGORITHM

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("armspan.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 50000, 100000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("sleeve_length.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])
\
]


# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)


  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2,

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  55.125808443281066
R-squared:  -0.4172607221470699

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  40.27100974005576
R-squared:  -0.03535026437765665

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  38.96408453222203
R-squared:  -0.0017497818422862466

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  44.18006297804331
R-squared:  -0.13585033451598094

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  50.439390687925176
R-squared:  -0.29677494606865884

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  53.97454889003432
R-squared:  -0.38766233634757663

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'nu

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("sleeve_width.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [20000, 50000, 100000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("bust.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (rewar

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  336.0709374709788
R-squared:  -3.688417495306135

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  295.78444196229776
R-squared:  -3.1263935613449227

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  242.14551782682875
R-squared:  -2.3780941926503356

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  373.7630971450067
R-squared:  -4.214248684939559

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  332.64958500077546
R-squared:  -3.640687308043824

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  171.7850335653979
R-squared:  -1.396517719921298

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iteratio

Optimal combination of hyperparameters:
Alpha: 0.1
Gamma: 0.9
Epsilon: 0.9
Num Iterations: 40000


In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("wrapper_length.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2,

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  428.07765269028425
R-squared:  -3.574126034118186

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  392.23397896834734
R-squared:  -3.1911266411352255

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  988.2585386858196
R-squared:  -9.559811010533974

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  427.07050186505353
R-squared:  -3.5633643538926023

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  428.6673569543489
R-squared:  -3.5804271844112154

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  229.12655913978503
R-squared:  -1.4482795415332776

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_itera

Mean Squared Error:  inf
R-squared:  -inf

Parameters:  {'alpha': 0.9, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  2.19850737307701e+181
R-squared:  -2.349164864877464e+179

Parameters:  {'alpha': 0.9, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterations': 10000}
Mean Squared Error:  inf
R-squared:  -inf

Parameters:  {'alpha': 0.9, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterations': 20000}
Mean Squared Error:  inf
R-squared:  -inf

Parameters:  {'alpha': 0.9, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterations': 40000}
Mean Squared Error:  inf
R-squared:  -inf

Parameters:  {'alpha': 0.9, 'epsilon': 0.5, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  68414009105.63027
R-squared:  -731022275.4066386

Parameters:  {'alpha': 0.9, 'epsilon': 0.5, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  2.025822885663857e+20
R-squared:  -2.164646797979835e+18

Parameters:  {'alpha': 0.9, 'epsilon': 0.5, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Erro

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("sleeve_width.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * 

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)


Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  255.0690778866863
R-squared:  -3.252982012880077

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  185.8793356116241
R-squared:  -2.0993230440638997

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  187.2849271736218
R-squared:  -2.1227596584907094

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  213.91488753801022
R-squared:  -2.5667834632255477

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  245.12781362032862
R-squared:  -3.0872229233800574

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  116.4666917767559
R-squared:  -0.9419474494130569

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterat

In [18]:
df

Unnamed: 0,anthropometric_measurement,garment_measurement,tolerance
0,38.7,63.5,24.8
1,26.0,63.5,37.5
2,34.0,63.4,29.4
3,26.2,35.0,8.8
4,29.8,58.0,28.2
...,...,...,...
147,34.0,60.2,26.2
148,35.5,66.1,30.6
149,28.7,60.5,31.8
150,34.5,65.6,31.1


In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 3 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   anthropometric_measurement  152 non-null    float64
 1   garment_measurement         152 non-null    float64
 2   tolerance                   152 non-null    float64
dtypes: float64(3)
memory usage: 3.7 KB


In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("sleeve_width.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space, dtype=np.float64)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action].astype(float)) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)


  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_

  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)


Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  308.6824255188344
R-squared:  -4.146922607400548

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  229.0332070413775
R-squared:  -2.8188639641060163

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  168.8240875871516
R-squared:  -1.8149464991911661

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  207.86220776523254
R-squared:  -2.465862024936653

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  238.44977825171614
R-squared:  -2.975874403444202

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  156.38272401433701
R-squared:  -1.6075011438808358

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterati

Optimal combination of hyperparameters:
Alpha: 0.1
Gamma: 0.9
Epsilon: 0.5
Num Iterations: 10000


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("top_length.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space, dtype=np.float64)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action].astype(float)) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)


  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)


Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  190.321723706958
R-squared:  -3.879125106303161

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  152.15157549367996
R-squared:  -2.9005876864473477

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  126.70637208471098
R-squared:  -2.2482694519884796

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  152.3136540337825
R-squared:  -2.9047427637491112

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  118.44677422150978
R-squared:  -2.036524778194105

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  46.67745519713268
R-squared:  -0.19663241334097292

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterat

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("wrapper_length.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space, dtype=np.float64)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action].astype(float)) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()

# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("armspan.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space, dtype=np.float64)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action].astype(float)) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()

# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)


  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action].astype(float)) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_err

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  684.9596780855688
R-squared:  -2.103069204613337

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  787.0804560579019
R-squared:  -2.5657064246067676

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  388.69250558485976
R-squared:  -0.7608916009705586

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  588.532460633394
R-squared:  -1.6662254917123023

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  573.678100362433
R-squared:  -1.59893086199062

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  493.21114695340503
R-squared:  -1.2343918488176175

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iterations

Epsilon: 0.9
Num Iterations: 10000


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data from the csv file into a dataframe
df = pd.read_csv("wrapper_width.csv")

# Define the state space and action space for the MDP
state_space = np.array(df['anthropometric_measurement'].unique())
action_space = np.array(df['garment_measurement'].unique())

# Define a function to train and evaluate the Q-learning model with a given set of hyperparameters
def train_eval_model(alpha, gamma, epsilon, num_iterations, df):
    # Split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(df[['anthropometric_measurement', 'garment_measurement']], df['tolerance'], test_size=0.2, random_state=42)

    # Initialize the Q-Table
    q_table = pd.DataFrame(0.0, index=state_space, columns=action_space)

    for i in range(num_iterations):
        state = X_train['anthropometric_measurement'].sample(1).iloc[0] # select a random state from the training data
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(action_space) # select a random action from the action space
        else:
            action = q_table.loc[state, :].idxmax() # select the action with the highest Q-Value for the current state

        tolerance = y_train[(X_train['anthropometric_measurement'] == state) & (X_train['garment_measurement'] == action)].values

        if len(tolerance) == 0:
            continue
        else:
            reward = np.abs(tolerance[0] - q_table.loc[state, action]) # calculate the reward based on the difference between the actual tolerance and the predicted tolerance
            q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value

    # Use the Q-Table to make predictions on the test data
    y_pred = []
    for i in X_test.index:
        state = X_test.loc[i, 'anthropometric_measurement']
        action = q_table.loc[state, :].idxmax()
        y_pred.append(q_table.loc[state, action])

    # Calculate the evaluation metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {'alpha': alpha, 'gamma': gamma, 'epsilon': epsilon, 'num_iterations': num_iterations, 'mse': mse, 'r2': r2}

# Define the hyperparameter grid
param_grid = {'alpha': [0.1, 0.5, 0.9], 'gamma': [0.1, 0.5, 0.9], 'epsilon': [0.1, 0.5,0.9], 'num_iterations': [10000, 20000, 40000]}

# Generate all combinations of hyperparameters
param_list = list(ParameterGrid(param_grid))

# Train and evaluate the model for each set of hyperparameters
results = []
for params in param_list:
    result = train_eval_model(params['alpha'], params['gamma'], params['epsilon'], params['num_iterations'], df)
    result.update({'params':params})
    results.append(result)

# Print the evaluation results for each set of hyperparameters
for result in results:
    print("Parameters: ", result['params'])
    print("Mean Squared Error: ", result['mse'])
    print("R-squared: ", result['r2'])
    print()
    
# Find the optimal combination of hyperparameters based on the minimum MSE value
optimal_result = min(results, key=lambda x: x['mse'])
optimal_params = optimal_result['params']
print("Optimal combination of hyperparameters:")
print("Alpha:", optimal_params['alpha'])
print("Gamma:", optimal_params['gamma'])
print("Epsilon:", optimal_params['epsilon'])
print("Num Iterations:", optimal_params['num_iterations'])

# Save the results in a dataframe
hyperparameters_combinations = pd.DataFrame(results)

# save into csv file
# hyperparameters_combinations.to_csv('hyperparameters.csv', index=False)

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  q_table.loc[state, action] += alpha * (reward + gamma * q_table.loc[state, :].max() - q_table.loc[state, action]) # update the Q-Table with the new value
  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64)
  q_table.loc[state, action] += alpha * 

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 10000}
Mean Squared Error:  5776.101676420497
R-squared:  -27.502026801315935

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 20000}
Mean Squared Error:  6588.231564173593
R-squared:  -31.509461075089497

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.1, 'num_iterations': 40000}
Mean Squared Error:  3976.8619427649983
R-squared:  -18.623724101073048

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 10000}
Mean Squared Error:  6063.941888706857
R-squared:  -28.92236700733641

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 20000}
Mean Squared Error:  3732.526463774496
R-squared:  -17.41805689491414

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.5, 'num_iterations': 40000}
Mean Squared Error:  2903.940592592595
R-squared:  -13.329420989485701

Parameters:  {'alpha': 0.1, 'epsilon': 0.1, 'gamma': 0.9, 'num_iteratio