# Logistic Regression Classifier Predictions

In [19]:
# Packages
import joblib
import pandas as pd
import os
import sklearn

## Load Test Data

In [20]:
def combine_directory_parquets(directory_path):
    '''
    Combines all parquet files in a directory into a single dataframe.
    '''
    # If path does not end in a slash, add one
    if directory_path[-1] != '/':
        directory_path += '/'
    # list of files in directory
    file_list = [f for f in os.listdir(directory_path) if f.endswith('.parquet')]
    # read in all parquet files
    combined_df = pd.concat([pd.read_parquet(directory_path + f) for f in file_list])
    # Return combined dataframe
    return combined_df
test_data = combine_directory_parquets('../../../Data/Features/All Features/test')
# Drop test_80_20, Class
test_data = test_data.drop(columns=['test_80_20', 'Class'])
print('all test data')
print(test_data)

all test data
                                            Image Path  \
0    ../../../Images/test/No Blur/Pickup_test_orig_...   
1    ../../../Images/test/No Blur/SUV_test_orig_tes...   
2    ../../../Images/test/No Blur/Convertible_test_...   
3    ../../../Images/test/No Blur/Convertible_test_...   
4    ../../../Images/test/No Blur/Sedan_test_orig_t...   
..                                                 ...   
99   ../../../Images/test/No Blur/SUV_test_orig_tes...   
100  ../../../Images/test/No Blur/Convertible_test_...   
101  ../../../Images/test/No Blur/Sedan_test_orig_t...   
102  ../../../Images/test/No Blur/SUV_test_orig_tra...   
103  ../../../Images/test/No Blur/Convertible_test_...   

     ViT_Embedding_Element_0  ViT_Embedding_Element_1  \
0                  -0.347766                 0.167595   
1                  -0.098150                 0.112476   
2                  -0.203125                 0.251158   
3                  -0.211059                 0.216949   
4   

## Load model from 'Best Logistic Regression Model.pkl'

In [21]:
best_model = joblib.load('Best Logistic Regression Model.joblib')
print('best model')
print(best_model)

best model
LogisticRegression(C=1, class_weight='balanced', l1_ratio=0.75, max_iter=1000,
                   multi_class='ovr', penalty='elasticnet', solver='saga')


## Create X_test

In [22]:
X_test = test_data.drop(columns=['Image Path'])


## Preprocess with standard scalar

In [23]:
scaler = sklearn.preprocessing.StandardScaler()
X_test = scaler.fit_transform(X_test)

## Predictions

In [24]:
predictions = best_model.predict(X_test)
print('predictions head')
print(predictions[:5])

# Add to test_data
test_data['Logistic_Regression_Classification'] = predictions

predictions head
['SUV' 'Sedan' 'Convertible' 'Convertible' 'Convertible']


## Save Predictions to Excel

In [25]:
test_data[['Image Path', 'Logistic_Regression_Classification']].to_excel('../../../Data/Predictions/Logistic Regression/Logistic_Regression_Classifier_Predictions.xlsx', index=False)