In [109]:
from sklearn.datasets import make_blobs          # This library will generate datapoints 
import numpy as np                               # It is used for linear algebra operations
import matplotlib.pyplot as plt                  # Library used for plotting
import pandas as pd                              # Library used for data manipulation
import math                                      # Library used for mathematical operations
import zipfile                                   # This library will deal with extracting zipfiles

###Downloading Facebook Metric Datatset

In [110]:
!gdown --id 1A7tXUvZD4mOcwhiHhsYDtzInAc2_KmNL          #Importing the data set

Downloading...
From: https://drive.google.com/uc?id=1A7tXUvZD4mOcwhiHhsYDtzInAc2_KmNL
To: /content/dataset_Facebook.zip
  0% 0.00/15.2k [00:00<?, ?B/s]100% 15.2k/15.2k [00:00<00:00, 11.4MB/s]


In [111]:
with zipfile.ZipFile("/content/dataset_Facebook.zip","r") as zip_ref:         #Extracting the data
    zip_ref.extractall("/content") 

In [112]:
data_frame = pd.read_csv('dataset_Facebook.csv',delimiter=';')             # Storing the data in a pandas data frame  

In [113]:
data_frame                                                                 # Displaying the data frame

Unnamed: 0,Page total likes,Type,Category,Post Month,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions
0,139441,Photo,2,12,4,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100
1,139441,Status,2,12,3,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164
2,139441,Photo,3,12,3,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80
3,139441,Photo,2,12,2,10,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777
4,139441,Photo,2,12,2,3,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,Photo,3,1,7,2,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84
496,81370,Photo,2,1,5,8,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75
497,81370,Photo,1,1,5,2,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115
498,81370,Photo,3,1,4,11,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136


## Data Preprocessing

**Converting the columns to one-hot vectors**

In [114]:
months = pd.get_dummies(data_frame['Post Month'], prefix="Month")   # Converting the Post Month categorical data into multiple one-hot vectors
data_frame = pd.concat([data_frame, months],axis=1)                 # Concatenating the Month one-hot vectors to data_frame
data_frame = data_frame.drop('Post Month',axis=1)                   # Dropping Post Month column

In [115]:
data_frame

Unnamed: 0,Page total likes,Type,Category,Post Weekday,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12
0,139441,Photo,2,4,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100,0,0,0,0,0,0,0,0,0,0,0,1
1,139441,Status,2,3,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164,0,0,0,0,0,0,0,0,0,0,0,1
2,139441,Photo,3,3,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80,0,0,0,0,0,0,0,0,0,0,0,1
3,139441,Photo,2,2,10,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777,0,0,0,0,0,0,0,0,0,0,0,1
4,139441,Photo,2,2,3,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,Photo,3,7,2,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84,1,0,0,0,0,0,0,0,0,0,0,0
496,81370,Photo,2,5,8,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75,1,0,0,0,0,0,0,0,0,0,0,0
497,81370,Photo,1,5,2,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115,1,0,0,0,0,0,0,0,0,0,0,0
498,81370,Photo,3,4,11,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136,1,0,0,0,0,0,0,0,0,0,0,0


In [116]:
weekdays = pd.get_dummies(data_frame['Post Weekday'], prefix="Day")         # Converting the Post weekdays categorical data into multiple one-hot vectors
data_frame = pd.concat([data_frame, weekdays],axis=1)                       # Concatenating the weekdays one-hot vectors to data_frame
data_frame = data_frame.drop('Post Weekday',axis=1)                         # Dropping Post Weekday column

In [117]:
data_frame

Unnamed: 0,Page total likes,Type,Category,Post Hour,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Day_1,Day_2,Day_3,Day_4,Day_5,Day_6,Day_7
0,139441,Photo,2,3,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0
1,139441,Status,2,10,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0
2,139441,Photo,3,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0
3,139441,Photo,2,10,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
4,139441,Photo,2,3,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,Photo,3,2,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
496,81370,Photo,2,8,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
497,81370,Photo,1,2,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
498,81370,Photo,3,11,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [118]:
hours = pd.get_dummies(data_frame['Post Hour'], prefix="Time")          # Converting the Post Hour categorical data into multiple one-hot vectors
data_frame = pd.concat([data_frame, hours],axis=1)                      # Concatenating the Time one-hot vectors to data_frame
data_frame = data_frame.drop('Post Hour',axis=1)                        # Dropping Post Hour column

In [119]:
data_frame

Unnamed: 0,Page total likes,Type,Category,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Day_1,Day_2,Day_3,Day_4,Day_5,Day_6,Day_7,Time_1,Time_2,Time_3,Time_4,Time_5,Time_6,Time_7,Time_8,Time_9,Time_10,Time_11,Time_12,Time_13,Time_14,Time_15,Time_16,Time_17,Time_18,Time_19,Time_20,Time_22,Time_23
0,139441,Photo,2,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,139441,Status,2,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,139441,Photo,3,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,139441,Photo,2,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,139441,Photo,2,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,Photo,3,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
496,81370,Photo,2,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
497,81370,Photo,1,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
498,81370,Photo,3,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [120]:
categories = pd.get_dummies(data_frame['Category'], prefix="Category")       # Converting the Category categorical data into multiple one-hot vectors
data_frame = pd.concat([data_frame, categories],axis=1)                      # Concatenating the Category one-hot vectors to data_frame
data_frame = data_frame.drop('Category',axis=1)                              # Dropping Category column  

In [121]:
data_frame

Unnamed: 0,Page total likes,Type,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Day_1,Day_2,Day_3,Day_4,Day_5,Day_6,Day_7,Time_1,Time_2,Time_3,Time_4,Time_5,Time_6,Time_7,Time_8,Time_9,Time_10,Time_11,Time_12,Time_13,Time_14,Time_15,Time_16,Time_17,Time_18,Time_19,Time_20,Time_22,Time_23,Category_1,Category_2,Category_3
0,139441,Photo,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1,139441,Status,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,139441,Photo,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,139441,Photo,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,139441,Photo,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,Photo,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
496,81370,Photo,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
497,81370,Photo,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
498,81370,Photo,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [122]:
types = pd.get_dummies(data_frame['Type'], prefix="Type")                     # Converting the Type categorical data into multiple one-hot vectors
data_frame = pd.concat([data_frame, types],axis=1)                            # Concatenating the Type one-hot vectors to data_frame
data_frame = data_frame.drop('Type',axis=1)                                   # Dropping Type column  

In [123]:
data_frame

Unnamed: 0,Page total likes,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Day_1,Day_2,Day_3,Day_4,Day_5,Day_6,Day_7,Time_1,Time_2,Time_3,Time_4,Time_5,Time_6,Time_7,Time_8,Time_9,Time_10,Time_11,Time_12,Time_13,Time_14,Time_15,Time_16,Time_17,Time_18,Time_19,Time_20,Time_22,Time_23,Category_1,Category_2,Category_3,Type_Link,Type_Photo,Type_Status,Type_Video
0,139441,0.0,2752,5091,178,109,159,3078,1640,119,4,79.0,17.0,100,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1,139441,0.0,10460,19057,1457,1361,1674,11710,6112,1108,5,130.0,29.0,164,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0
2,139441,0.0,2413,4373,177,113,154,2812,1503,132,0,66.0,14.0,80,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
3,139441,1.0,50128,87991,2211,790,1119,61027,32048,1386,58,1572.0,147.0,1777,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
4,139441,0.0,7244,13594,671,410,580,6228,3200,396,19,325.0,49.0,393,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,85093,0.0,4684,7536,733,708,985,4750,2876,392,5,53.0,26.0,84,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
496,81370,0.0,3480,6229,537,508,687,3961,2104,301,0,53.0,22.0,75,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
497,81370,0.0,3778,7216,625,572,795,4742,2388,363,4,93.0,18.0,115,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
498,81370,0.0,4156,7564,626,574,832,4534,2452,370,7,91.0,38.0,136,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0


In [124]:
list(data_frame.columns)

['Page total likes',
 'Paid',
 'Lifetime Post Total Reach',
 'Lifetime Post Total Impressions',
 'Lifetime Engaged Users',
 'Lifetime Post Consumers',
 'Lifetime Post Consumptions',
 'Lifetime Post Impressions by people who have liked your Page',
 'Lifetime Post reach by people who like your Page',
 'Lifetime People who have liked your Page and engaged with your post',
 'comment',
 'like',
 'share',
 'Total Interactions',
 'Month_1',
 'Month_2',
 'Month_3',
 'Month_4',
 'Month_5',
 'Month_6',
 'Month_7',
 'Month_8',
 'Month_9',
 'Month_10',
 'Month_11',
 'Month_12',
 'Day_1',
 'Day_2',
 'Day_3',
 'Day_4',
 'Day_5',
 'Day_6',
 'Day_7',
 'Time_1',
 'Time_2',
 'Time_3',
 'Time_4',
 'Time_5',
 'Time_6',
 'Time_7',
 'Time_8',
 'Time_9',
 'Time_10',
 'Time_11',
 'Time_12',
 'Time_13',
 'Time_14',
 'Time_15',
 'Time_16',
 'Time_17',
 'Time_18',
 'Time_19',
 'Time_20',
 'Time_22',
 'Time_23',
 'Category_1',
 'Category_2',
 'Category_3',
 'Type_Link',
 'Type_Photo',
 'Type_Status',
 'Type_Video

 **Function to find columns having null values**

In [125]:
def check_for_columns_with_null_values(data_frame):                 # This function find columns with null values and returns a list of such columns
  column_headers=list(data_frame.columns)                           # To find list of all column headers         
  columns_with_null_values = []                                       
  for c in column_headers:                                          # Iterating over all the columns 
    if(data_frame[c].isnull().values.any()==True):                  # Checking condition for column having null values 
      columns_with_null_values.append(c)                            # Appending column with null values to this list  
  return columns_with_null_values

In [126]:
columns_with_null_values = check_for_columns_with_null_values(data_frame)          

In [127]:
columns_with_null_values                         # Display all columns with null values

['Paid', 'like', 'share']

**Replacing the null values with 0**

In [128]:
sum(data_frame['Paid'].isnull().values)                 # Counting how many rows in 'Paid' column have null values    

1

In [129]:
data_frame['Paid'] = data_frame['Paid'].fillna(0)      # Replacing the null values with 0

In [130]:
sum(data_frame['Paid'].isnull().values)                # After replacing null values with 0, we find that no row has null values        

0

In [131]:
sum(data_frame['like'].isnull().values)            # Counting how many rows in 'like' column have null values  

1

In [132]:
data_frame['like'] = data_frame['like'].fillna(0)    # Replacing the null values with 0

In [133]:
sum(data_frame['like'].isnull().values)             # After replacing null values with 0, we find that no row has null values           

0

In [134]:
sum(data_frame['share'].isnull().values)         # Counting how many rows in 'share' column have null values  

4

In [135]:
data_frame['share'] = data_frame['share'].fillna(0)     # Replacing the null values with 0

In [136]:
sum(data_frame['share'].isnull().values)         # After replacing null values with 0, we find that no row has null values           

0

**Normalizing the data**

In [137]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()                      # For normalizing the features

In [138]:
data_frame['Page total likes'] = scaler.fit_transform(np.array(data_frame['Page total likes']).reshape(-1,1))    # Standardizing the features for 'Page total likes' column

In [139]:
data_frame['Page total likes']

0      0.999403
1      0.999403
2      0.999403
3      0.999403
4      0.999403
         ...   
495   -2.343746
496   -2.572761
497   -2.572761
498   -2.572761
499   -2.572761
Name: Page total likes, Length: 500, dtype: float64

In [140]:
# Standardizing the features for 'Page total likes' column for various columns using StandardScaler()

data_frame['Paid'] = scaler.fit_transform(np.array(data_frame['Paid']).reshape(-1,1))
data_frame['Lifetime Post Total Reach'] = scaler.fit_transform(np.array(data_frame['Lifetime Post Total Reach']).reshape(-1,1))
data_frame['Lifetime Post Total Impressions'] = scaler.fit_transform(np.array(data_frame['Lifetime Post Total Impressions']).reshape(-1,1))
data_frame['Lifetime Engaged Users'] = scaler.fit_transform(np.array(data_frame['Lifetime Engaged Users']).reshape(-1,1))
data_frame['Lifetime Post Consumers'] = scaler.fit_transform(np.array(data_frame['Lifetime Post Consumers']).reshape(-1,1))
data_frame['Lifetime Post Consumptions'] = scaler.fit_transform(np.array(data_frame['Lifetime Post Consumptions']).reshape(-1,1))
data_frame['Lifetime Post Impressions by people who have liked your Page'] = scaler.fit_transform(np.array(data_frame['Lifetime Post Impressions by people who have liked your Page']).reshape(-1,1))
data_frame['Lifetime Post reach by people who like your Page'] = scaler.fit_transform(np.array(data_frame['Lifetime Post reach by people who like your Page']).reshape(-1,1))
data_frame['Lifetime People who have liked your Page and engaged with your post'] = scaler.fit_transform(np.array(data_frame['Lifetime People who have liked your Page and engaged with your post']).reshape(-1,1))
data_frame['comment'] = scaler.fit_transform(np.array(data_frame['comment']).reshape(-1,1))
data_frame['like'] = scaler.fit_transform(np.array(data_frame['like']).reshape(-1,1))
data_frame['share'] = scaler.fit_transform(np.array(data_frame['share']).reshape(-1,1))
data_frame['Total Interactions'] = scaler.fit_transform(np.array(data_frame['Total Interactions']).reshape(-1,1))

In [141]:
data_frame

Unnamed: 0,Page total likes,Paid,Lifetime Post Total Reach,Lifetime Post Total Impressions,Lifetime Engaged Users,Lifetime Post Consumers,Lifetime Post Consumptions,Lifetime Post Impressions by people who have liked your Page,Lifetime Post reach by people who like your Page,Lifetime People who have liked your Page and engaged with your post,comment,like,share,Total Interactions,Month_1,Month_2,Month_3,Month_4,Month_5,Month_6,Month_7,Month_8,Month_9,Month_10,Month_11,Month_12,Day_1,Day_2,Day_3,Day_4,Day_5,Day_6,Day_7,Time_1,Time_2,Time_3,Time_4,Time_5,Time_6,Time_7,Time_8,Time_9,Time_10,Time_11,Time_12,Time_13,Time_14,Time_15,Time_16,Time_17,Time_18,Time_19,Time_20,Time_22,Time_23,Category_1,Category_2,Category_3,Type_Link,Type_Photo,Type_Status,Type_Video
0,0.999403,-0.620517,-0.490859,-0.319251,-0.754391,-0.782390,-0.628507,-0.229166,-0.644420,-0.802117,-0.164558,-0.305375,-0.236595,-0.295167,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
1,0.999403,-0.620517,-0.151569,-0.137227,0.545365,0.637720,0.129526,-0.084652,-0.061698,0.813599,-0.117298,-0.147406,0.045963,-0.126681,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0
2,0.999403,-0.620517,-0.505781,-0.328608,-0.755407,-0.777853,-0.631009,-0.233620,-0.662272,-0.780879,-0.353596,-0.345641,-0.307234,-0.347819,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
3,0.999403,1.611559,1.594532,0.761212,1.311601,-0.009950,-0.148169,0.740997,3.317884,1.267764,2.387461,4.319075,2.824446,4.119702,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
4,0.999403,-0.620517,-0.293131,-0.208428,-0.253390,-0.440974,-0.417859,-0.176430,-0.441145,-0.349586,0.544336,0.456591,0.516892,0.476185,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,-2.343746,-0.620517,-0.405817,-0.287384,-0.190384,-0.102960,-0.215216,-0.201174,-0.483364,-0.356121,-0.117298,-0.385908,-0.024677,-0.337289,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
496,-2.572761,-0.620517,-0.458814,-0.304419,-0.389565,-0.329815,-0.364321,-0.214383,-0.583959,-0.504786,-0.353596,-0.385908,-0.118863,-0.360982,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
497,-2.572761,-0.620517,-0.445697,-0.291555,-0.300137,-0.257221,-0.310283,-0.201308,-0.546952,-0.403498,-0.164558,-0.262011,-0.213048,-0.255678,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
498,-2.572761,-0.620517,-0.429058,-0.287019,-0.299121,-0.254953,-0.291770,-0.204790,-0.538613,-0.392062,-0.022779,-0.268206,0.257881,-0.200393,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0
