In [4]:
# Import necessary libraries
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split




In [5]:
# Load the dataset
file_path = 'C:/Users/ASUS/OneDrive/Desktop/Drug_Data_Featured.xlsx'
df = pd.read_excel(file_path)

In [6]:
# Define feature (X) and target (y) variables
X = df.drop(columns=['Sales'])  # Exclude target variable
y = df['Sales']


In [7]:
# Save 'Drug Name'& Disease Category before encoding
drug_names = X[['Drug Name']]
disease_categories = X[['Disease Category']]

In [8]:
# Convert 'Date' to numerical format
X['Date'] = pd.to_datetime(X['Date']).dt.month.astype(float)  # Convert date to month number


In [9]:
# One-Hot Encoding for categorical variables
X = pd.get_dummies(X, columns=['Disease Category', 'Drug Category', 'Drug Name', 'Dosage'])


In [10]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [11]:
# Convert to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [12]:
# Define XGBoost parameters
params = {
    'objective': 'reg:squarederror',  # Regression task
    'eval_metric': 'rmse',            # Root Mean Squared Error
    'max_depth': 6,
    'learning_rate': 0.1,
    'colsample_bytree': 0.8,
    'subsample': 0.8
}

In [13]:
# Train the model
num_boost_round = 100
model = xgb.train(params, dtrain, num_boost_round=num_boost_round)

In [14]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred = model.predict(dtest)
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))

TypeError: got an unexpected keyword argument 'squared'

In [15]:
# Save a copy of test data for future predictions
X_test_future = X_test.copy()

In [16]:
# Initialize DataFrame to store results
future_results = pd.DataFrame()


In [18]:
# # Loop for multi-month prediction (6 months ahead)
# for month in range(1, 7):
#     X_test_future['Date'] += 1  # Move forward one month
#     dtest_future = xgb.DMatrix(X_test_future)
#     predictions_future = model.predict(dtest_future)
#
#     # Store results
#     future_month_results = pd.DataFrame({
#         'Disease Category': disease_categories.iloc[X_test_future.index]['Disease Category'].values,
#         'Drug Name': drug_names.iloc[X_test_future.index]['Drug Name'].values,
#         'Month': [month] * len(predictions_future),
#         'Predicted Sales': predictions_future
#     })
#
#     future_results = pd.concat([future_results, future_month_results], ignore_index=True)
#
# Ensure 'Date' column is used correctly
X_test_future = X_test.copy()
original_indices = X_test_future.index  # Retain original indices for proper merging

# Loop for multi-month prediction (6 months ahead)
for month in range(1, 7):
    X_test_future['Date'] += 1  # Increment 'Date' to simulate next month
    dtest_future = xgb.DMatrix(X_test_future)
    predictions_future = model.predict(dtest_future)

    # Store results with original indices
    future_month_results = pd.DataFrame({
        'Index': original_indices,  # Retain original indices
        'Disease Category': disease_categories.iloc[X_test_future.index]['Disease Category'].values,
        'Drug Name': drug_names.iloc[X_test_future.index]['Drug Name'].values,
        'Month': [month] * len(predictions_future),
        'Predicted Sales': predictions_future
    })

    future_results = pd.concat([future_results, future_month_results], ignore_index=True)



In [19]:
# # Pivot results for better readability
# pivot_results = future_results.pivot_table(
#     index=['Disease Category', 'Drug Name'],
#     columns='Month',
#     values='Predicted Sales'
# ).reset_index()
#
# #Display predictions
# print(pivot_results)
#
#
# # Save to Excel
# pivot_results.to_excel("predicted_sales.xlsx", index=False)
#
# print("Excel file 'predicted_sales.xlsx' has been saved successfully!")
#
# # Merge with original input file to retain all columns
# merged_results = df.merge(pivot_results, on=['Disease Category', 'Drug Name'], how='left')
#
# # Save to Excel
# merged_results.to_excel("C:/Users/ASUS/OneDrive/Desktop/predicted_sales_full.xlsx", index=False)
#
# print("Excel file 'predicted_sales_full.xlsx' has been saved with all input columns!")

# Restore original indices for merging
future_results.set_index('Index', inplace=True)

# Pivot results for better readability
pivot_results = future_results.pivot_table(
    index=['Disease Category', 'Drug Name'],
    columns='Month',
    values='Predicted Sales'
).reset_index()

# Display predictions
print(pivot_results)

# Save to Excel
pivot_results.to_excel("xgb_predicted_sales.xlsx", index=False)
print("Excel file 'predicted_sales.xlsx' has been saved successfully!")

# Merge with original input file to retain all columns
merged_results = df.merge(pivot_results, on=['Disease Category', 'Drug Name'], how='left')

# Save to Excel
merged_results.to_excel("C:/Users/ASUS/OneDrive/Desktop/xgb_predicted_sales_full.xlsx", index=False)
print("Excel file 'predicted_sales_full.xlsx' has been saved with all input columns!")


Month Disease Category                      Drug Name             1  \
0       Cardiovascular       AMILODIPINE (CADLA) 10MG    153.194565   
1       Cardiovascular                    AMLODAC 5MG    377.132355   
2       Cardiovascular                    AMLONG 10MG   1381.081543   
3       Cardiovascular          AMLONG 2. 2. 2. 2.5MG   7265.875488   
4       Cardiovascular                  AMLOPRESS 5MG  12477.567383   
..                 ...                            ...           ...   
242           Diabetes                RECLIDE MR 30MG   1759.579102   
243           Diabetes                RECLIDE MR 60MG   1074.301025   
244           Diabetes  TOLBUTAMIDE TABS (INTPH) 80MG   1880.179565   
245           Diabetes                    VEXGID 80MG    123.993767   
246           Diabetes              VEXGID MR 30 80MG    949.623779   

Month             2             3             4             5             6  
0        153.194565    153.194565    153.194565    153.194565    153.