Train linear regression model

In [10]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import joblib

# Load the data from a CSV file
df = pd.read_csv('train2.csv')

# Define the independent variables (features) and the dependent variable (target)
X = df[['Previous Day Foot Traffic (People)', 'Previous Week Average Temperature (Celcius)']]
y = df['Shoe Pairs Sold']

# Create a linear regression model
model = LinearRegression()

# Fit the model to the data
model.fit(X, y)

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')

joblib_file = "multi_linear_regression_model.joblib"
joblib.dump(model, joblib_file)

print(f'Model saved as {joblib_file}')

R² score: 0.9166623298252717
Model saved as multi_linear_regression_model.joblib


Test multi linear regression

In [11]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import joblib

# Load the data from a CSV file
df = pd.read_csv('test2.csv')

# Define the independent variables (features) and the dependent variable (target)
X = df[['Previous Day Foot Traffic (People)', 'Previous Week Average Temperature (Celcius)']]
y = df['Shoe Pairs Sold']

# Create a linear regression model
model = joblib.load("multi_linear_regression_model.joblib")

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')


R² score: 0.7397230240745114


Train Logistic Classifier

In [12]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'train3.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Spend ($)", "Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Store Member"]

# Create and train the logistic regression model
model = LogisticRegression()
model.fit(X, y)

# Make predictions on the entire dataset
y_pred = model.predict(X)

# Evaluate the model
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y, y_pred))

print("Confusion Matrix:")
cm = confusion_matrix(y, y_pred)
print(cm)

# Calculate True Positive, False Positive, True Negative, and False Negative rates
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]
TP = cm[1, 1]

# True Positive Rate (Sensitivity)
TPR = TP / (TP + FN)
# False Positive Rate
FPR = FP / (FP + TN)
# True Negative Rate (Specificity)
TNR = TN / (TN + FP)
# False Negative Rate
FNR = FN / (FN + TP)

print(f"True Positive Rate (Sensitivity): {TPR:.2f}")
print(f"False Positive Rate: {FPR:.2f}")
print(f"True Negative Rate (Specificity): {TNR:.2f}")
print(f"False Negative Rate: {FNR:.2f}")

# Save the model
model_filename = 'logistic_regression_model.joblib'
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}")


   Store Member  Spend ($)  Salary  Weight (lb)  Times Visited in Past Year  \
0             0     130.65   10000          200                           9   
1             0     113.30    8440          181                          20   
2             0      44.51    2486          108                           1   
3             1     685.35   66785          184                          10   
4             0     493.08   47268          147                          10   

   Age  
0   22  
1   14  
2   38  
3   13  
4   11  
Accuracy: 0.93
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.92      0.96      0.94        23

    accuracy                           0.93        42
   macro avg       0.93      0.93      0.93        42
weighted avg       0.93      0.93      0.93        42

Confusion Matrix:
[[17  2]
 [ 1 22]]
True Positive Rate (Sensitivity): 0.96
False Positive Rate: 0.11
True

Test Logistic Classifier

In [13]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'test3.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Spend ($)", "Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Store Member"]

# Create and train the logistic regression model
model = joblib.load('logistic_regression_model.joblib')

# Make predictions on the entire dataset
y_pred = model.predict(X)

# Evaluate the model
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y, y_pred))

print("Confusion Matrix:")
cm = confusion_matrix(y, y_pred)
print(cm)

# Calculate True Positive, False Positive, True Negative, and False Negative rates
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]
TP = cm[1, 1]

# True Positive Rate (Sensitivity)
TPR = TP / (TP + FN)
# False Positive Rate
FPR = FP / (FP + TN)
# True Negative Rate (Specificity)
TNR = TN / (TN + FP)
# False Negative Rate
FNR = FN / (FN + TP)

print(f"True Positive Rate (Sensitivity): {TPR:.2f}")
print(f"False Positive Rate: {FPR:.2f}")
print(f"True Negative Rate (Specificity): {TNR:.2f}")
print(f"False Negative Rate: {FNR:.2f}")


   Store Member  Spend ($)  Salary  Weight (lb)  Times Visited in Past Year  \
0             0     299.97   26857          200                          12   
1             0     325.05   28755          185                           0   
2             0     220.67   20677          114                          10   
3             0     173.18   15073          166                           9   
4             1     695.23   67368          128                          15   

   Age  
0   22  
1   45  
2   35  
3   38  
4   20  
Accuracy: 0.90
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.95      0.91        22
           1       0.94      0.84      0.89        19

    accuracy                           0.90        41
   macro avg       0.91      0.90      0.90        41
weighted avg       0.91      0.90      0.90        41

Confusion Matrix:
[[21  1]
 [ 3 16]]
True Positive Rate (Sensitivity): 0.84
False Positive Rate: 0.05
True

Neural Network Classifier - FIX TO SPLIT DATASET BEFORE

True Positive Rate (Sensitivity): 0.92
False Positive Rate: 0.10
True Negative Rate (Specificity): 0.90
False Negative Rate: 0.08
Model saved as mlp_classifier_model.joblib

In [14]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'train4.csv'
data = pd.read_csv(file_path)

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Spend ($)", "Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Store Member"]

# Create and train the MLPClassifier model
model = MLPClassifier(hidden_layer_sizes=(5, 7), max_iter=500, random_state=42)
model.fit(X, y)

# Make predictions on the entire dataset
y_pred = model.predict(X)

# Evaluate the model
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y, y_pred))

print("Confusion Matrix:")
cm = confusion_matrix(y, y_pred)
print(cm)

# Calculate True Positive, False Positive, True Negative, and False Negative rates
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]
TP = cm[1, 1]

# True Positive Rate (Sensitivity)
TPR = TP / (TP + FN)
# False Positive Rate
FPR = FP / (FP + TN)
# True Negative Rate (Specificity)
TNR = TN / (TN + FP)
# False Negative Rate
FNR = FN / (FN + TP)

print(f"True Positive Rate (Sensitivity): {TPR:.2f}")
print(f"False Positive Rate: {FPR:.2f}")
print(f"True Negative Rate (Specificity): {TNR:.2f}")
print(f"False Negative Rate: {FNR:.2f}")

# Save the model
model_filename = 'mlp_classifier_model2.joblib'
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}")


Accuracy: 0.89
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.91      0.90       187
           1       0.90      0.86      0.88       167

    accuracy                           0.89       354
   macro avg       0.89      0.89      0.89       354
weighted avg       0.89      0.89      0.89       354

Confusion Matrix:
[[171  16]
 [ 24 143]]
True Positive Rate (Sensitivity): 0.86
False Positive Rate: 0.09
True Negative Rate (Specificity): 0.91
False Negative Rate: 0.14
Model saved as mlp_classifier_model2.joblib


In [7]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'test4.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Spend ($)", "Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Store Member"]

# Create and train the logistic regression model
model = joblib.load('mlp_classifier_model2.joblib')

# Make predictions on the entire dataset
y_pred = model.predict(X)

# Evaluate the model
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report and confusion matrix
print("Classification Report:")
print(classification_report(y, y_pred))

print("Confusion Matrix:")
cm = confusion_matrix(y, y_pred)
print(cm)

# Calculate True Positive, False Positive, True Negative, and False Negative rates
TN = cm[0, 0]
FP = cm[0, 1]
FN = cm[1, 0]
TP = cm[1, 1]

# True Positive Rate (Sensitivity)
TPR = TP / (TP + FN)
# False Positive Rate
FPR = FP / (FP + TN)
# True Negative Rate (Specificity)
TNR = TN / (TN + FP)
# False Negative Rate
FNR = FN / (FN + TP)

print(f"True Positive Rate (Sensitivity): {TPR:.2f}")
print(f"False Positive Rate: {FPR:.2f}")
print(f"True Negative Rate (Specificity): {TNR:.2f}")
print(f"False Negative Rate: {FNR:.2f}")


   Store Member   Spend ($)  Salary  Weight (lb)  Times Visited in Past Year  \
0             1  740.708490   72198          118                           9   
1             0  495.725511   47006          152                          14   
2             0  160.113494   13297          158                          15   
3             0  334.352672   31512          116                           1   
4             1  665.097256   63987          134                          19   

   Age  
0   27  
1   38  
2   38  
3   29  
4   50  
Accuracy: 0.90
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.94      0.91       195
           1       0.92      0.84      0.88       159

    accuracy                           0.90       354
   macro avg       0.90      0.89      0.89       354
weighted avg       0.90      0.90      0.89       354

Confusion Matrix:
[[184  11]
 [ 26 133]]
True Positive Rate (Sensitivity): 0.84
False Positive Rate:

Value Prediction Neural Network -- VERY SIMILAR RESULT TO LINEAR REGRESSION ON SMALLER DATASET EXAMPLE

In [11]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
import joblib

# Load the data from a CSV file
df = pd.read_csv('train2.csv')

# Define the independent variables (features) and the dependent variable (target)
X = df[['Previous Day Foot Traffic (People)', 'Previous Week Average Temperature (Celcius)']]
y = df['Shoe Pairs Sold']

# Create a linear regression model
model = MLPRegressor(hidden_layer_sizes=(2,2), max_iter=1000, random_state=42)

# Fit the model to the data
model.fit(X, y)

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')

joblib_file = "MLP_regressor.joblib"
joblib.dump(model, joblib_file)

print(f'Model saved as {joblib_file}')

R² score: 0.9134578577537976
Model saved as MLP_regressor.joblib




In [12]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
import joblib

# Load the data from a CSV file
df = pd.read_csv('test2.csv')

# Define the independent variables (features) and the dependent variable (target)
X = df[['Previous Day Foot Traffic (People)', 'Previous Week Average Temperature (Celcius)']]
y = df['Shoe Pairs Sold']

# Create a linear regression model
model = joblib.load("MLP_regressor.joblib")

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')

R² score: 0.7584843569016629


Complex value Prediction

In [15]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'train4.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Spend ($)"]

# Create and train the logistic regression model
model = MLPRegressor(hidden_layer_sizes=(10, 10, 10), max_iter=1000, random_state=42)

model.fit(X, y)

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')

joblib_file = "MLP_regressor_complex.joblib"
joblib.dump(model, joblib_file)

print(f'Model saved as {joblib_file}')

   Store Member  Spend ($)  Salary  Weight (lb)  Times Visited in Past Year  \
0             0     130.65   10000          200                           9   
1             0     113.30    8440          181                          20   
2             0      44.51    2486          108                           1   
3             1     685.35   66785          184                          10   
4             0     493.08   47268          147                          10   

   Age  
0   22  
1   14  
2   38  
3   13  
4   11  
R² score: 0.9986422844961218
Model saved as MLP_regressor_complex.joblib


In [16]:
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Load the data
file_path = 'test4.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Preprocess the data
# Assuming there are no missing values and no categorical variables to encode

# Define the features and the target variable
X = data[["Salary", "Weight (lb)", "Times Visited in Past Year", "Age"]]
y = data["Spend ($)"]

# Create and train the logistic regression model
model = joblib.load('MLP_regressor_complex.joblib')

# Predict the target variable
y_pred = model.predict(X)

# Calculate the R² score
r2 = r2_score(y, y_pred)

print(f'R² score: {r2}')

   Store Member   Spend ($)  Salary  Weight (lb)  Times Visited in Past Year  \
0             1  774.519174   74508          178                          13   
1             1  740.708490   72198          118                           9   
2             0  495.725511   47006          152                          14   
3             0  160.113494   13297          158                          15   
4             0  334.352672   31512          116                           1   

   Age  
0   44  
1   27  
2   38  
3   38  
4   29  
R² score: 0.9987667808080891
