In [84]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import statsmodels.api as sm

In [39]:
# ----------------------------
# Step 1: Create Dummy Dataset
# ----------------------------
data = {
    "Sales": [520, 610, 430, 700, 640, 580, 720, 810],
    "Advertising": [20, 25, 18, 30, 28, 22, 35, 40],
    "Price": [50, 48, 52, 47, 49, 51, 46, 45],
    "CompetitorPrice": [55, 54, 53, 56, 55, 52, 57, 58]
}

df = pd.DataFrame(data)

In [40]:
df

Unnamed: 0,Sales,Advertising,Price,CompetitorPrice
0,520,20,50,55
1,610,25,48,54
2,430,18,52,53
3,700,30,47,56
4,640,28,49,55
5,580,22,51,52
6,720,35,46,57
7,810,40,45,58


In [41]:
X = df.drop(columns='Sales')
y = df['Sales']
X.shape,y.shape

((8, 3), (8,))

In [42]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [43]:
X_train.shape,y_train.shape

((6, 3), (6,))

In [44]:
X_test.shape,y_test.shape

((2, 3), (2,))

In [45]:
le = LinearRegression()
le.fit(X_train,y_train)

In [46]:
bias = le.score(X_train,y_train)
bias

0.9871977532922775

In [47]:
variance = le.score(X_test,y_test)
variance

0.8663527280062078

In [48]:
le.coef_

array([ 14.1915468 ,  -8.23401426, -10.68737661])

In [54]:
b1 = le.intercept_
b1

np.float64(1238.848865788308)

In [50]:
y_pred = le.predict(X_test)

In [51]:
y_pred

array([747.60788106, 499.69700633])

In [53]:
comparsion = pd.DataFrame({'actaul':y_test,'predict':y_pred})
comparsion

Unnamed: 0,actaul,predict
6,720,747.607881
2,430,499.697006


In [56]:
sales = b1+14.19*20-8.23*50-10.68*55
sales

np.float64(523.7488657883081)

In [55]:
df

Unnamed: 0,Sales,Advertising,Price,CompetitorPrice
0,520,20,50,55
1,610,25,48,54
2,430,18,52,53
3,700,30,47,56
4,640,28,49,55
5,580,22,51,52
6,720,35,46,57
7,810,40,45,58


In [63]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# -----------------------------
# Step 1: Create synthetic PayU-like dataset (1000 records)
# -----------------------------
np.random.seed(42)

n = 1000

data = {
    "MarketingSpend": np.random.randint(50, 500, n),  # in lakhs
    "TransactionFee": np.random.uniform(1.0, 3.0, n),  # in %
    "CompetitorDiscount": np.random.uniform(0.5, 2.5, n),  # in %
    "ActiveMerchants": np.random.randint(1000, 10000, n),  # count of merchants
}

# Define true coefficients for simulation
beta_0 = 500
beta_m = 25      # MarketingSpend effect
beta_f = -40     # TransactionFee effect
beta_c = -30     # CompetitorDiscount effect
beta_a = 1.5     # ActiveMerchants effect

# Generate target variable with some noise
data["TransactionVolume"] = (
    beta_0
    + beta_m * data["MarketingSpend"]
    + beta_f * data["TransactionFee"]
    + beta_c * data["CompetitorDiscount"]
    + beta_a * data["ActiveMerchants"]
    + np.random.normal(0, 100, n)  # random noise
)

df = pd.DataFrame(data)


In [64]:
df.shape

(1000, 5)

In [65]:
df.head()

Unnamed: 0,MarketingSpend,TransactionFee,CompetitorDiscount,ActiveMerchants,TransactionVolume
0,152,2.899041,0.691429,6789,14409.748435
1,485,1.294147,0.547277,1436,14630.38423
2,398,2.853175,1.783943,3828,16113.890243
3,320,1.984233,1.714188,2556,12140.047281
4,156,1.516489,1.593395,8570,17171.855128


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   MarketingSpend      1000 non-null   int32  
 1   TransactionFee      1000 non-null   float64
 2   CompetitorDiscount  1000 non-null   float64
 3   ActiveMerchants     1000 non-null   int32  
 4   TransactionVolume   1000 non-null   float64
dtypes: float64(3), int32(2)
memory usage: 31.4 KB


In [67]:
X = df.drop(columns='TransactionVolume')
y = df['TransactionVolume']

In [69]:
X.shape,y.shape

((1000, 4), (1000,))

In [73]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

In [74]:
X_train.shape,y_train.shape

((800, 4), (800,))

In [89]:
le = LinearRegression()
le.fit(X_train,y_train)

In [90]:
bias = le.score(X_train,y_train)
bias

0.9995763175257674

In [91]:
variance = le.score(X_test,y_test)
variance

0.9994691961293592

In [92]:
le.coef_

array([ 24.97554829, -30.45764548, -19.63282262,   1.50273877])

In [93]:
le.intercept_

np.float64(456.4589549035172)

In [94]:
df.columns

Index(['MarketingSpend', 'TransactionFee', 'CompetitorDiscount',
       'ActiveMerchants', 'TransactionVolume'],
      dtype='object')

In [95]:
y_pred = le.predict(X_test)

In [96]:
# Step 4: Evaluation metrics
# -----------------------------
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

In [97]:
mae,rmse,r2

(86.70398764770763, np.float64(107.68300148235771), 0.9994691961293592)