<a href="https://colab.research.google.com/github/atharva2-0x/Calci_ts/blob/main/Final_AI_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

num_records = 200
age_categories = ['18-25', '26-35', '36-45', '46-55', '55+']
promotion_types = ['Discount', 'Buy One Get One', 'Loyalty Program', 'Happy Hour']

data = {
    'Campaign_ID': np.arange(1, num_records + 1),
    'Previous_Sales': np.random.randint(500, 1500, size=num_records),
    'During_Campaign_Sales': np.random.randint(600, 2000, size=num_records),
    'Foot_Traffic_Before_Campaign': np.random.randint(100, 500, size=num_records),
    'Foot_Traffic_During_Campaign': np.random.randint(150, 700, size=num_records),
    'Customer_Age_Group': np.random.choice(age_categories, size=num_records),
    'Customer_Gender': np.random.choice(['Male', 'Female'], size=num_records),
    'Promotion_Type': np.random.choice(promotion_types, size=num_records),
    'Campaign_Duration_Days': np.random.randint(5, 15, size=num_records),
    'Day_of_Week': np.random.choice(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], size=num_records)
}

df = pd.DataFrame(data)

# success criteria threshold of sales increase (e.g., 20% increase in sales during the campaign)
df['Sales_Increase'] = ((df['During_Campaign_Sales'] - df['Previous_Sales']) / df['Previous_Sales']) * 100
df['Rush_Increase'] = ((df['Foot_Traffic_During_Campaign'] - df['Foot_Traffic_Before_Campaign']) / df['Foot_Traffic_Before_Campaign']) * 100
df['Campaign_Success'] = np.logical_and(df['Sales_Increase'] >= 20, df['Rush_Increase'] >= 20)

df = df.drop(columns='Sales_Increase')
df = df.drop(columns='Rush_Increase')

df.head()



Unnamed: 0,Campaign_ID,Previous_Sales,During_Campaign_Sales,Foot_Traffic_Before_Campaign,Foot_Traffic_During_Campaign,Customer_Age_Group,Customer_Gender,Promotion_Type,Campaign_Duration_Days,Day_of_Week,Campaign_Success
0,1,602,1084,157,569,46-55,Female,Loyalty Program,7,Thursday,True
1,2,935,830,422,426,26-35,Male,Buy One Get One,5,Thursday,False
2,3,1360,1348,459,543,26-35,Female,Buy One Get One,13,Wednesday,False
3,4,770,1794,273,606,26-35,Male,Loyalty Program,12,Tuesday,True
4,5,606,1140,379,341,26-35,Male,Buy One Get One,6,Friday,False


In [None]:
transformed_csv_file_path = 'campaign_success_dataset.csv'
df.to_csv(transformed_csv_file_path, index=False)

In [None]:
import pandas as pd
import numpy as np


In [None]:
df = pd.read_csv('campaign_success_dataset.csv')
df.head()

Unnamed: 0,Campaign_ID,Previous_Sales,During_Campaign_Sales,Foot_Traffic_Before_Campaign,Foot_Traffic_During_Campaign,Customer_Age_Group,Customer_Gender,Promotion_Type,Campaign_Duration_Days,Day_of_Week,Campaign_Success
0,1,602,1084,157,569,46-55,Female,Loyalty Program,7,Thursday,True
1,2,935,830,422,426,26-35,Male,Buy One Get One,5,Thursday,False
2,3,1360,1348,459,543,26-35,Female,Buy One Get One,13,Wednesday,False
3,4,770,1794,273,606,26-35,Male,Loyalty Program,12,Tuesday,True
4,5,606,1140,379,341,26-35,Male,Buy One Get One,6,Friday,False


In [None]:
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, LabelEncoder

# One-hot encoding for categorical features: Customer_Age_Group, Promotion_Type, Day_of_Week

# Create a DataFrame for the encoded features
one_hot_encoder = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')

encoded_features = one_hot_encoder.fit_transform(df[['Promotion_Type', 'Day_of_Week']])
encoded_feature_names = one_hot_encoder.get_feature_names_out(['Promotion_Type', 'Day_of_Week'])

# Create a DataFrame for the encoded features
encoded_df = pd.DataFrame(encoded_features, columns=encoded_feature_names)

# Label encoding for binary categorical feature: Customer_Gender
label_encoder = LabelEncoder()

# Combine the encoded features with the original dataset
df_encoded = pd.concat([df.drop(columns=['Promotion_Type', 'Day_of_Week']), encoded_df], axis=1)


In [None]:
# Normalization using Min-Max Scalling
scaler = MinMaxScaler()
numerical_features = ['Previous_Sales', 'During_Campaign_Sales', 'Foot_Traffic_Before_Campaign', 'Foot_Traffic_During_Campaign', 'Campaign_Duration_Days']
df_encoded[numerical_features] = scaler.fit_transform(df_encoded[numerical_features])
df_encoded = df_encoded.drop(columns=['Customer_Age_Group', 'Customer_Gender'])

In [None]:
df_encoded.head()

Unnamed: 0,Campaign_ID,Previous_Sales,During_Campaign_Sales,Foot_Traffic_Before_Campaign,Foot_Traffic_During_Campaign,Campaign_Duration_Days,Campaign_Success,Promotion_Type_Discount,Promotion_Type_Happy Hour,Promotion_Type_Loyalty Program,Day_of_Week_Monday,Day_of_Week_Saturday,Day_of_Week_Sunday,Day_of_Week_Thursday,Day_of_Week_Tuesday,Day_of_Week_Wednesday
0,1,0.10161,0.345741,0.142857,0.765996,0.222222,True,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1,2,0.43662,0.163923,0.807018,0.50457,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,3,0.864185,0.534717,0.899749,0.718464,0.888889,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,4,0.270624,0.853973,0.433584,0.833638,0.777778,True,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
4,5,0.105634,0.385827,0.699248,0.349177,0.111111,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
transformed_csv_file_path = 'campaign_success_transformed_dataset.csv'
df_encoded.to_csv(transformed_csv_file_path, index=False)

transformed_csv_file_path


'campaign_success_transformed_dataset.csv'

In [None]:
pip install scikit-learn pandas numpy




In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd


In [None]:
df = pd.read_csv('campaign_success_transformed_dataset.csv')

# Separate features (X) and target variable (y)
X = df.drop(columns=['Campaign_Success'])
y = df['Campaign_Success']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Logistic Regression model
model = LogisticRegression()

# Train the model using the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the evaluation results
print("Model Evaluation Results:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)


Model Evaluation Results:
Accuracy: 0.85
Precision: 0.82
Recall: 0.69
F1-score: 0.75
Confusion Matrix:
[[25  2]
 [ 4  9]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
import joblib

# Save the trained model
joblib.dump(model, 'logistic_regression_model.joblib')


['logistic_regression_model.joblib']

In [None]:
# Importing necessary libraries for regression model training and evaluation
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load the transformed dataset
file_path = 'campaign_success_transformed_dataset.csv'
data = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = data.drop(columns=['During_Campaign_Sales','During_Campaign_Sales_num' ,'Campaign_ID', 'Day_of_Week_Monday', 'Day_of_Week_Tuesday', 'Day_of_Week_Wednesday','Day_of_Week_Saturday', 'Day_of_Week_Sunday', 'Day_of_Week_Thursday'])
y = data['During_Campaign_Sales_num']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the regression models
linear_regressor = LinearRegression()
random_forest_regressor = RandomForestRegressor(random_state=42)
gradient_boosting_regressor = GradientBoostingRegressor(random_state=42)

# Train the models
linear_regressor.fit(X_train, y_train)
random_forest_regressor.fit(X_train, y_train)
gradient_boosting_regressor.fit(X_train, y_train)

# Make predictions with each model
y_pred_linear = linear_regressor.predict(X_test)
y_pred_rf = random_forest_regressor.predict(X_test)
y_pred_gb = gradient_boosting_regressor.predict(X_test)

# Evaluate the models using MAE, MSE, and R² score
evaluation_metrics = {
    'Model': ['Linear Regression', 'Random Forest Regression', 'Gradient Boosting Regression'],
    'MAE': [
        mean_absolute_error(y_test, y_pred_linear),
        mean_absolute_error(y_test, y_pred_rf),
        mean_absolute_error(y_test, y_pred_gb)
    ],
    'MSE': [
        mean_squared_error(y_test, y_pred_linear),
        mean_squared_error(y_test, y_pred_rf),
        mean_squared_error(y_test, y_pred_gb)
    ],
    'R2 Score': [
        r2_score(y_test, y_pred_linear),
        r2_score(y_test, y_pred_rf),
        r2_score(y_test, y_pred_gb)
    ]
}

# Convert the results to a DataFrame for better visualization
evaluation_df = pd.DataFrame(evaluation_metrics)
evaluation_df


Unnamed: 0,Model,MAE,MSE,R2 Score
0,Linear Regression,316.635368,154954.119153,0.077471
1,Random Forest Regression,321.99975,150272.007902,0.105347
2,Gradient Boosting Regression,308.159339,138749.459323,0.173947


In [None]:
import joblib

# Save the trained model
joblib.dump(gradient_boosting_regressor, 'GB_regression_model.joblib')


['GB_regression_model.joblib']