<a href="https://colab.research.google.com/github/bhuvan-0078/web-scaping/blob/master/DataPlots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import xgboost as xgb
import numpy as np
import openai

In [2]:
data = pd.read_csv('./sample_data/sample_ecommerce_sales.csv')
data['Date'] = pd.to_datetime(data['Date'])
data['Month'] = data['Date'].dt.month
data['DayOfWeek'] = data['Date'].dt.dayofweek

In [3]:
data

Unnamed: 0,Date,Product ID,ProductCategory,QuantitySold,Price,CustomerDemographics,Discounts or promotions used,Web traffic,TotalRevenue,Month,DayOfWeek
0,2022-09-03,4ba80ab1,Beauty,7,139.02,Adult,20% off,3778,973.14,9,5
1,2021-12-05,0a769199,Beauty,15,323.78,Young Adult,Buy 1 Get 1,1586,4856.70,12,6
2,2022-10-13,3dcccb12,Clothing,11,207.32,Senior,Free Shipping,4182,2280.52,10,3
3,2021-06-01,dd87c415,Home Decor,8,17.57,Teenager,Buy 1 Get 1,4283,140.56,6,1
4,2022-09-18,d8258472,Clothing,7,82.30,Teenager,Free Shipping,560,576.10,9,6
...,...,...,...,...,...,...,...,...,...,...,...
995,2021-10-20,92077b54,Beauty,1,26.37,Teenager,20% off,3675,26.37,10,2
996,2022-08-28,84abcb58,Electronics,8,86.58,Senior,Buy 1 Get 1,3216,692.64,8,6
997,2022-12-13,5fede88d,Books,7,70.21,Senior,10% off,3385,491.47,12,1
998,2023-04-22,d5aacc06,Books,3,364.36,Senior,,743,1093.08,4,5


In [4]:
# Encode categorical features
encoder = LabelEncoder()
data['ProductCategory'] = encoder.fit_transform(data['ProductCategory'])

In [6]:
# Normalize numerical features
scaler = MinMaxScaler()
numerical_features = ['QuantitySold', 'Price', 'TotalRevenue']
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [8]:
# Prepare features and labels
X = data[['QuantitySold', 'Price', 'Month', 'DayOfWeek', 'ProductCategory']]
y = data['TotalRevenue']

In [9]:
# Convert to DMatrix format for XGBoost
dtrain = xgb.DMatrix(X, label=y)


In [10]:
# Define XGBoost parameters
params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'max_depth': 6,
    'learning_rate': 0.1,
    'n_estimators': 100
}

# Train the model
model = xgb.train(params, dtrain, num_boost_round=100)

print("Training complete!")

Parameters: { "n_estimators" } are not used.



Training complete!


In [11]:
# Generate textual insights from predictions
def generate_sales_report(model, X, data):
    dtest = xgb.DMatrix(X)
    predictions = model.predict(dtest)
    reports = []

    for i, pred in enumerate(predictions):
        report = f"On {data.iloc[i]['Date'].date()}, {data.iloc[i]['QuantitySold']} units of product category {data.iloc[i]['ProductCategory']} were sold at a price of {data.iloc[i]['Price']:.2f}, generating an estimated revenue of {pred:.2f}."
        reports.append(report)

    return reports

In [12]:
# Generate reports
sales_reports = generate_sales_report(model, X, data)

# Print sample reports
for report in sales_reports[:5]:
    print(report)

On 2022-09-03, 0.33333333333333326 units of product category 0 were sold at a price of 0.27, generating an estimated revenue of 0.11.
On 2021-12-05, 0.7777777777777777 units of product category 0 were sold at a price of 0.64, generating an estimated revenue of 0.51.
On 2022-10-13, 0.5555555555555555 units of product category 2 were sold at a price of 0.41, generating an estimated revenue of 0.24.
On 2021-06-01, 0.38888888888888884 units of product category 4 were sold at a price of 0.02, generating an estimated revenue of 0.01.
On 2022-09-18, 0.33333333333333326 units of product category 2 were sold at a price of 0.15, generating an estimated revenue of 0.06.


In [13]:
# Predict next quarter sales
def predict_next_quarter_sales(model, data):
    future_dates = pd.date_range(start=data['Date'].max(), periods=90, freq='D')
    future_data = pd.DataFrame({'Date': future_dates})
    future_data['Month'] = future_data['Date'].dt.month
    future_data['DayOfWeek'] = future_data['Date'].dt.dayofweek

    # Assume average values for numerical and categorical features
    future_data['QuantitySold'] = data['QuantitySold'].mean()
    future_data['Price'] = data['Price'].mean()
    future_data['ProductCategory'] = data['ProductCategory'].mode()[0]

    X_future = future_data[['QuantitySold', 'Price', 'Month', 'DayOfWeek', 'ProductCategory']]
    dfuture = xgb.DMatrix(X_future)
    future_predictions = model.predict(dfuture)

    future_data['PredictedRevenue'] = future_predictions
    return future_data

In [14]:
# Predict and print next quarter sales
next_quarter_sales = predict_next_quarter_sales(model, data)
print(next_quarter_sales.head(10))

        Date  Month  DayOfWeek  QuantitySold     Price  ProductCategory  \
0 2025-02-04      2          1      0.485889  0.499609                3   
1 2025-02-05      2          2      0.485889  0.499609                3   
2 2025-02-06      2          3      0.485889  0.499609                3   
3 2025-02-07      2          4      0.485889  0.499609                3   
4 2025-02-08      2          5      0.485889  0.499609                3   
5 2025-02-09      2          6      0.485889  0.499609                3   
6 2025-02-10      2          0      0.485889  0.499609                3   
7 2025-02-11      2          1      0.485889  0.499609                3   
8 2025-02-12      2          2      0.485889  0.499609                3   
9 2025-02-13      2          3      0.485889  0.499609                3   

   PredictedRevenue  
0          0.243376  
1          0.243211  
2          0.243518  
3          0.243354  
4          0.243354  
5          0.242945  
6          0.243135 

In [22]:
# Generate sales chart
def plot_sales_forecast(next_quarter_sales):
    plt.figure(figsize=(12, 6))
    plt.plot(next_quarter_sales['Date'], next_quarter_sales['PredictedRevenue'], label='Predicted Revenue', color='blue')
    plt.xlabel('Date')
    plt.ylabel('Predicted Revenue')
    plt.title('Next Quarter Sales Forecast')
    plt.legend()
    plt.grid()
    plt.show()

In [26]:
# Process natural language prompts
def process_natural_language_prompt(prompt):
    prompt = prompt.lower()
    if "next quarter" in prompt and "sales" in prompt:
        plot_sales_forecast(next_quarter_sales)
        return f"Based on our predictions, the estimated revenue for the next quarter is:\n{next_quarter_sales.head(10).to_string()}"
    elif "sales report" in prompt:
        return "Here is a summary of recent sales:\n" + "\n".join(sales_reports[:5])
    elif "predict" in prompt or "forecast" in prompt:
        return "I can provide sales predictions and insights. Try asking 'What are the next quarter's sales?' or 'Generate a sales report.'"
    else:
        return "I didn't understand that request. Please ask about sales reports or forecasts."

In [None]:
user_prompt = input("Enter your query: ")
response = process_natural_language_prompt(user_prompt)
print(response)


In [18]:
import matplotlib.pyplot as plt