<a href="https://colab.research.google.com/github/YasiruMM/Medicine-Prediction-Grp-22/blob/Model_Training_for_Demand_Prediction/XGBooster.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from google.colab import drive

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset
file_path = '/content/drive/My Drive/DSGP/MediTrackData.csv'
df = pd.read_csv(file_path)

Mounted at /content/drive


In [13]:
# Prepare feature (X) and target (Y) variables
X = df[['Disease Category', 'Drug Category', 'Drug Name', 'Dosage',
        'Retail Price', 'Purchase Price', 'Sales', 'Date',
        'Mean Sales', 'CV', 'Buffer Percentage', 'Buffer Stock']]
Y = df[['Sales']]

In [14]:
# Save 'Drug Name'& Disease Category before encoding
drug_names = X[['Drug Name']]
disease_categories = X[['Disease Category']]

In [15]:
# Convert 'Date' feature to numerical month values
X['Date'] = X['Date'].astype(float)

# Convert categorical variables into numerical labels (ONE-HOT-Encoding)
X = pd.get_dummies(X, columns=['Disease Category', 'Drug Category', 'Drug Name', 'Dosage'])

In [16]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [17]:
# Convert to DMatrix format
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)


In [18]:
# Set the model parameters
params = {
    'objective': 'reg:squarederror',  # Regression task
    'eval_metric': 'rmse',            # Evaluation metric: root mean square error
    'max_depth': 6,                   # Maximum tree depth
    'learning_rate': 0.1,             # Learning rate
    'colsample_bytree': 0.8,          # Column sampling rate
    'subsample': 0.8                  # Subsample rate
}

In [19]:
# Train the model
num_boost_round = 100  # Number of boosting rounds
model = xgb.train(params, dtrain, num_boost_round=num_boost_round)

In [20]:
# saving test dataset for future predictions
X_test_future=X_test.copy()


In [22]:
# loop XGBooster for 6 months predictions
future_results=pd.DataFrame()
for month in range(1,7):
  X_test_future['Date']=X_test_future['Date']+1
  dtest_future = xgb.DMatrix(X_test_future)
  predictions_future = model.predict(dtest_future) # prediction for next month
  future_Month_Results=pd.DataFrame({
          'Disease Category': disease_categories.iloc[X_test_future.index]['Disease Category'],
          'Drug Name': drug_names.iloc[X_test_future.index]['Drug Name'],
          'Month': [month] * len(predictions_future),
          'Predicted Sales': predictions_future

      })

     # Concatenate instead of append
  future_results = pd.concat([future_results, future_Month_Results], ignore_index=True)




In [23]:
print(future_results)

     Disease Category          Drug Name  Month  Predicted Sales
0         Cholesterol      GLIDABET 80MG      1        85.558144
1         Cholesterol     GLUCOZIDE 80MG      1      7875.267090
2         Cholesterol   DIAZIDE TAB 80MG      1        35.108910
3         Cholesterol        GLIVIC 40MG      1       116.454086
4      Cardiovascular       LOWPRES 50MG      1       948.305786
...               ...                ...    ...              ...
7015      Cholesterol    GLICLAZIDE 80MG      6       322.201630
7016      Cholesterol        GD-CARE 5MG      6       139.593445
7017      Cholesterol  GLIVIC MR 60 80MG      6        12.478703
7018      Cholesterol          PIOZ 15MG      6      8908.690430
7019      Cholesterol       RECLIDE 80MG      6      7542.361328

[7020 rows x 4 columns]


In [None]:
# # Make predictions
# predictions = model.predict(dtest)

# # Create a DataFrame with the predictions and drug names
# results = pd.DataFrame({
#     'Drug Name': drug_names.iloc[X_test.index]['Drug Name'],  # Map back to the original Drug Name
#     'Disease Category': disease_categories.iloc[X_test.index]['Disease Category'],  # Map back to the original Disease Category
#     'Predicted Sales': predictions
# })

In [None]:
# Pivot the DataFrame so each drug has predictions in a single row
pivot_results = future_results.pivot_table(
    index=['Disease Category', 'Drug Name'],
    columns='Month',
    values='Predicted Sales'
).reset_index()


             Drug Name  Predicted Sales
5110     GLIDABET 80MG        85.558144
3309    GLUCOZIDE 80MG      7882.855957
4233  DIAZIDE TAB 80MG        35.108910
5527       GLIVIC 40MG       116.454086
681       LOWPRES 50MG       948.305786
