<a href="https://colab.research.google.com/github/ashish7493/IIITN/blob/main/roi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'advertising-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F133357%2F317184%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240427%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240427T230052Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D2cb29b817c2d059563cfc6ec406b7d222efde0791f5f8353494ef8622a5583b4aa1cdf2632ee0c92480f111c7cbc80d394092ef9de8930cd08c6ee7a5f59064bbf31895732531d7ae4edc8ce1ca5592284f3b990a573b244c211d81bbbadcedef0968d997d8300f95fc33f11fc2b5a2c735351814005e3bcf206de0a1b43fcfd6accab7b43d73b8d97813183b21ea0e32966810ae72ea6bcfd75b4cc6c7ff6e185725b89671cf7a410d753a0b75590ddf6e471acdfa024813e39d87953772791ee26b93f8d8997c42a8d45d457c9d0ad5a72a3be0be37b072130d4d4eaf78b570358964cbb53a919c00834e0b3057406e5784b6a760a204098fc608825773b5c'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
advertising = pd.read_csv('../input/advertising-dataset/advertising.csv')
advertising.head(5)

In [None]:
advertising.tail(5)

In [None]:
advertising.describe()

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.pairplot(advertising)

In [None]:
sns.pairplot(advertising,x_vars=['TV','Radio','Newspaper'],y_vars='Sales',aspect=1, kind='reg')

In [None]:
X = advertising[['TV','Radio','Newspaper']]
Y = advertising['Sales']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size=0.7,random_state=100)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train,Y_train)

In [None]:
print(model.coef_)
print(model.intercept_)

# **Model Evaluation**

In [None]:
model_coef = pd.DataFrame(model.coef_,X_test.columns,columns=['Coeffiecients'])
model_coef

In [None]:
Y_pred = model.predict(X_test)

Calculating Error

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(Y_test,Y_pred)
r_squared = r2_score(Y_test,Y_pred)
print('Mean squared', mse)
print('Predict square',r_squared)

In [None]:
import statsmodels.api as sm
X_train_sm = X_train
X_train_sm = sm.add_constant(X_train_sm)
model_l = sm.OLS(Y_train,X_train_sm).fit()
model_l.params

In [None]:
print(model_l.summary())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
plt.figure(figsize=(5,5))
sns.heatmap(advertising.corr(),annot = True)

In [None]:
# Removing Newspaper
X_train_new = X_train[['TV','Radio']]
X_test_new = X_test[['TV','Radio']]
model.fit(X_train_new,Y_train)



In [None]:
Y_pred_new = model.predict(X_test_new)

In [None]:

# Actual Sales value VS Predicted Sales
C = [i for i in range(1,61,1)]
fig = plt.figure()
plt.plot(C,Y_test, color="green", linewidth=2.5, linestyle="-")
plt.plot(C,Y_pred, color="purple", linewidth=2.5, linestyle="-")
plt.suptitle('Actual and Predicted Y(Sales) Value', fontsize=20)
plt.xlabel('Index',fontsize=18)
plt.ylabel('Sales',fontsize=17)