# Sales Forecasting

### Importing the libraries

In [1]:
import pandas as pd 
import numpy as np 

### Loading the dataset

In [2]:
filename = 'sales.csv'
data = pd.read_csv(filename)
data.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month,sales_in_third_month
0,7.0,,150,200
1,8.0,120.0,180,240
2,,80.0,120,160
3,4.0,150.0,225,300
4,5.0,,135,180


### Checking for the data Types of the columns

In [5]:
data.info

<bound method DataFrame.info of    rate  sales_in_first_month  sales_in_second_month  sales_in_third_month
0   7.0                   NaN                    150                   200
1   8.0                 120.0                    180                   240
2   NaN                  80.0                    120                   160
3   4.0                 150.0                    225                   300
4   5.0                   NaN                    135                   180
5   6.0                 110.0                    165                   220
6   NaN                  95.0                    142                   190
7   3.0                 130.0                    195                   260
8   9.0                   NaN                    127                   170
9   4.0                 140.0                    210                   280>

### finding the mean of the data

In [6]:
data.mean()

rate                       5.750000
sales_in_first_month     117.857143
sales_in_second_month    164.900000
sales_in_third_month     220.000000
dtype: float64

### Handling the missing values

In [8]:

data['rate'].fillna(0, inplace=True)
data.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month,sales_in_third_month
0,7.0,,150,200
1,8.0,120.0,180,240
2,0.0,80.0,120,160
3,4.0,150.0,225,300
4,5.0,,135,180


In [9]:
data['sales_in_first_month'].fillna(118, inplace=True)
data.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month,sales_in_third_month
0,7.0,118.0,150,200
1,8.0,120.0,180,240
2,0.0,80.0,120,160
3,4.0,150.0,225,300
4,5.0,118.0,135,180


### Converting the data type 

In [10]:

data['sales_in_first_month'] = data['sales_in_first_month'].astype('int64')
data['rate'] = data['rate'].astype('int64')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   rate                   10 non-null     int64
 1   sales_in_first_month   10 non-null     int64
 2   sales_in_second_month  10 non-null     int64
 3   sales_in_third_month   10 non-null     int64
dtypes: int64(4)
memory usage: 452.0 bytes


In [11]:
data.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month,sales_in_third_month
0,7,118,150,200
1,8,120,180,240
2,0,80,120,160
3,4,150,225,300
4,5,118,135,180


### Feature Selection

In [12]:
X = data.iloc[:, :3]
Y = data.iloc[:, -1]

In [15]:
X.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month
0,7,118,150
1,8,120,180
2,0,80,120
3,4,150,225
4,5,118,135


In [16]:
Y.head()

0    200
1    240
2    160
3    300
4    180
Name: sales_in_third_month, dtype: int64

### Concatenating X and Y

In [17]:
df = pd.concat([X, Y], axis=1)
df.head()

Unnamed: 0,rate,sales_in_first_month,sales_in_second_month,sales_in_third_month
0,7,118,150,200
1,8,120,180,240
2,0,80,120,160
3,4,150,225,300
4,5,118,135,180


In [18]:
df.dtypes

rate                     int64
sales_in_first_month     int64
sales_in_second_month    int64
sales_in_third_month     int64
dtype: object

### Saving the Clean Data

In [19]:
df.to_csv('cleaned_sales.csv')

### Fitting the model

In [20]:
from sklearn.linear_model import LinearRegression

In [21]:
clf = LinearRegression()
clf.fit(X, Y)
clf.score(X, Y)

0.9999738258921149

### Saving the model

In [22]:
import pickle

In [23]:
pickle.dump(clf, open('model.pkl', 'wb'))

### Making the prediction

In [26]:
model = pickle.load(open('model.pkl', 'rb'))
model.predict([[4, 300, 500]])

array([665.84118388])