In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
class LogisticRegressionModel:
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.model = None

    def load(self, file_path):
        self.historic_data = pd.read_csv(file_path)

    def preprocess(self):
        self.historic_data.drop(['item_no'], axis=1, inplace=True)
        self.historic_data = pd.get_dummies(self.historic_data, columns=['category', 'main_promotion', 'color'])

        self.X = self.historic_data.drop('success_indicator', axis=1)
        self.y = self.historic_data['success_indicator']

    def train(self):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=self.random_state)
        self.model = LogisticRegression(random_state=self.random_state)
        self.model.fit(self.X_train, self.y_train)

    def test(self):
        self.y_pred = self.model.predict(self.X_test)
        self.accuracy = accuracy_score(self.y_test, self.y_pred)
        self.classification_rep = classification_report(self.y_test, self.y_pred)

    def predict(self, data):
        return self.model.predict(data)

    def evaluate(self):
        print("Logistic Regression Accuracy:", self.accuracy)
        print("\nClassification Report:")
        print(self.classification_rep)

In [3]:
logistic_model = LogisticRegressionModel()
logistic_model.load('historic.csv')
logistic_model.preprocess()
logistic_model.train()
logistic_model.test()
logistic_model.evaluate()

Logistic Regression Accuracy: 0.818125

Classification Report:
              precision    recall  f1-score   support

        flop       0.80      0.66      0.72       571
         top       0.83      0.91      0.87      1029

    accuracy                           0.82      1600
   macro avg       0.81      0.78      0.79      1600
weighted avg       0.82      0.82      0.81      1600



For logistic regression we got accuracy as 81 percent for svm we got accuracy as 84 percent for random forest we got accuracy as 82 percent and for gradient boosting model we got accuray as 83.5 percent and for  a Artificial neural netwrk model we got accuracy between 85 and 87 percent