## you have provided with a dataset that contains the costs of advertising on 
different media channels and the corresponding sales of XYZ firm.Evaluate the dataset to:

* find the features or media channels used by the firm

* find the sales figure for each channel

* create a model to predict the sales outcome

* split it into train and test datsets for the model

* calculate the mean squared error(mse)

In [96]:
import pandas as pd
import requests
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [97]:
url = 'https://raw.githubusercontent.com/jarif87/DataSets/main/Advertising.csv'

response = requests.get(url)

data = pd.read_csv(url,index_col=0)

data.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [98]:
data.shape

(200, 4)

In [99]:
data.columns

Index(['TV', 'Radio', 'Newspaper', 'Sales'], dtype='object')

In [100]:
X_data=data[["TV","Radio","Newspaper"]]

In [101]:
X_data

Unnamed: 0,TV,Radio,Newspaper
1,230.1,37.8,69.2
2,44.5,39.3,45.1
3,17.2,45.9,69.3
4,151.5,41.3,58.5
5,180.8,10.8,58.4
...,...,...,...
196,38.2,3.7,13.8
197,94.2,4.9,8.1
198,177.0,9.3,6.4
199,283.6,42.0,66.2


In [102]:
Y_data=data["Sales"]

In [103]:
Y_data

1      22.1
2      10.4
3       9.3
4      18.5
5      12.9
       ... 
196     7.6
197     9.7
198    12.8
199    25.5
200    13.4
Name: Sales, Length: 200, dtype: float64

In [104]:
X_train,X_test,Y_train,Y_test=train_test_split(X_data,Y_data,random_state=42)

In [105]:
print(f"Train Data Shape :{X_train.shape}")
print("Test Data shape {}\nTrain Label Data shape {}\nTest Label Data shape {}".format(X_test.shape,Y_train.shape,Y_test.shape))

Train Data Shape :(150, 3)
Test Data shape (50, 3)
Train Label Data shape (150,)
Test Label Data shape (50,)


In [106]:
model=LinearRegression()

In [107]:
model.fit(X_train,Y_train)

In [108]:
print("Intercept :",model.intercept_)

Intercept : 2.778303460245283


In [109]:
print("Coefficient :",model.coef_)

Coefficient : [0.04543356 0.19145654 0.00256809]


In [110]:
pred=model.predict(X_test)
pred

array([16.38348211, 20.92434957, 21.61495426, 10.49069997, 22.17690456,
       13.02668085, 21.10309295,  7.31813008, 13.56732111, 15.12238649,
        8.92494113,  6.49924401, 14.30119928,  8.77233515,  9.58665483,
       12.09485291,  8.59621605, 16.25337881, 10.16948105, 18.85753401,
       19.5799036 , 13.15877029, 12.25103735, 21.35141984,  7.69607607,
        5.64686906, 20.79780073, 11.90951247,  9.06581044,  8.37295611,
       12.40815899,  9.89416076, 21.42707658, 12.14236853, 18.28776857,
       20.18114718, 13.99303029, 20.89987736, 10.9313953 ,  4.38721626,
        9.58213448, 12.6170249 ,  9.93851933,  8.06816257, 13.45497849,
        5.25769423,  9.15399537, 14.09552838,  8.71029827, 11.55102817])

In [111]:
# calculate the MSE
mse = mean_squared_error(Y_test, pred)

print(f"Mean Squared Error: {mse}")

Mean Squared Error: 2.880023730094193


# First Method to predict Custom Data

In [112]:
# create a custom data point as a numpy array
custom_data = np.array([[230.1, 37.8, 69.2]])

# make a prediction on the custom data point
prediction = model.predict(custom_data)

print(f"Prediction: {prediction}")

Prediction: [20.64733423]


# Second Method to predict Custom Data

In [113]:
# assume model is a trained linear regression model

# create a custom data point as a numpy array
custom_data = np.array([3.2, 4.5, 1.9]).reshape(1, -1)  # reshape to (1, n_features)

# make a prediction on the custom data point
prediction = model.predict(custom_data)

print(f"Prediction: {prediction}")


Prediction: [3.79012463]


# Third Method to predict Custom Data

In [114]:


# create a custom data point as a dictionary
custom_data_dict = {'TV': 44.5, 'Radio': 39.3, 'Newspaper': 45.1}

# convert the custom data point dictionary to a pandas dataframe
custom_data_df = pd.DataFrame([custom_data_dict])

feature_names=["TV","Radio","Newspaper"]
# select only the features used during training
custom_data_df = custom_data_df[feature_names]

# make a prediction on the custom data point
prediction = model.predict(custom_data_df)

print(f"Prediction: {prediction}")


Prediction: [12.44015956]
