In [1]:
# Loading all important Libraries
import numpy as np
import pickle
from keras.models import model_from_json
import pandas as pd
import datetime
import re
from prettytable import PrettyTable
import warnings
warnings.filterwarnings('ignore')

In [10]:
class final:
    def load_files(self): # This function loads all important files
        #https://machinelearningmastery.com/save-load-keras-deep-learning-models/
        json_file = open("/content/model.json", 'r') #Loads model
        loaded_model_json = json_file.read()
        json_file.close()
        self.model = model_from_json(loaded_model_json)
        self.model.load_weights("model.h5") # Loads weights of model

        with open('access_enc.pkl','rb') as file:
            self.access_enc=pickle.load(file) #Loads label encoder for access

        with open('lang_enc.pkl','rb') as file:
            self.lang_enc=pickle.load(file) #Loads label encoder for language

        with open('spider_enc.pkl','rb') as file:
            self.agent_enc=pickle.load(file) #Loads label encoder for agent
        self.new_data=pd.read_csv('final_data.csv')    #Loads data which is required to make prediction

    def find_access(self,page):#This function finds the client of the page for which we are making prediction
        k=max([i.start() for i in re.finditer('org_',page)])   #https://www.geeksforgeeks.org/python-all-occurrences-of-substring-in-string/
        if('all-access' in page[k:]):
            access='all_access'
        if('desktop' in page[k:]):
            access='desktop'
        if('mobile' in page[k:]):
            access='mobile'
        k=access
        access=self.access_enc.transform([access]).reshape(1,1)
        return access,k
    def find_lang(self,page): # This function finds language of the page for which we are making prediction
        index=page.find('.wikipedia')
        lang=page[index-1:index-3:-1][::-1]
        lang_dict={'de':'German','en':'English', 'es':'Spanish', 'fr':'French', 'ja':'Japanese', 'nt':'Media', 'ru':'Russian', 'zh':'Chinese'}
        language=lang_dict[lang]
        lang=self.lang_enc.transform([lang]).reshape(1,1)
        return lang,language
    def find_agent(self,page): #This page finds if the page was accessed by a spider or not.
        if('spider' in page):
            spider='spider'
        else:
            spider='non-spider'
        k=spider
        agent=self.agent_enc.transform([spider]).reshape(1,1)
        return agent,k
    def find_data(self,ind,date):
        ''' This function returns the traffic on last 5 days on the page on which we are making prediction,
         this data is neccessary in order to make prediction and this data will be fed to loaded model.'''
        data=self.new_data.iloc[ind].values
        date1=datetime.date(2015,7,6)
        k=date.split('-')
        date2=datetime.date(int(k[0]),int(k[1]),int(k[2]))
        dif=(date2-date1).days
        data=np.log1p(data[dif+1:dif+6].astype(int))
        data=np.array(data).reshape(1,5,1)
        return data
    def final_fun_1(self,ind,date): # This data takes index of the page and date as input for which we want to make prediction
        self.load_files()
        start=datetime.datetime.now()
        self.page=self.new_data['Page'].values[ind]
        access,access1=self.find_access(self.page)
        lang,language=self.find_lang(self.page)
        agent,agent1=self.find_agent(self.page)
        data=self.find_data(ind,date)
        predicted=int(np.round(np.expm1(self.model.predict([data,access,lang,agent])[0])[0]))

        x = PrettyTable()
        x = PrettyTable(["Client","Access", "Language",'Predicted','Time Taken'])
        x.add_row([agent1,access1,language,predicted,datetime.datetime.now()-start])

        print(x)
        #Prints the client,access,language,time taken and predicted traffic on the page
    def final_fun_2(self,set): #This function takes index,date and actual traffic on the page as input, can also take a list of multiple inputs
        self.load_files()
        x = PrettyTable()
        x = PrettyTable(["Client","Access", "Language",'Predicted','Actual','SMAPE','Time Taken'])
        for val in set:
            start=datetime.datetime.now()
            self.page=self.new_data['Page'].values[val[0]]
            access,access1=self.find_access(self.page)
            lang,language=self.find_lang(self.page)
            agent,agent1=self.find_agent(self.page)
            data=self.find_data(val[0],val[1])
            actual=int(val[2])
            predicted=int(np.round(np.expm1(self.model.predict([data,access,lang,agent],steps=1)[0])[0]))
            smape=np.abs(actual-predicted)/((actual+predicted)/2)

            x.add_row([agent1,access1,language,predicted,actual,np.round(smape,3),datetime.datetime.now()-start])
            #Print client,access,language,prediction,actual,time taken and SMAPE of the page
        print(x)

In [11]:
final_object = final() #creating the object of final class

In [14]:
final_object.final_fun_1(8,'2015-07-07') #taken a random page index to test the model

+--------+------------+----------+-----------+----------------+
| Client |   Access   | Language | Predicted |   Time Taken   |
+--------+------------+----------+-----------+----------------+
| spider | all_access | Chinese  |     5     | 0:00:00.767551 |
+--------+------------+----------+-----------+----------------+


In [8]:
data=pd.read_csv('final_data.csv') # Reading data in order to provide actual data as input
data.head()

Unnamed: 0,Page,2015-07-01,2015-07-02,2015-07-03,2015-07-04,2015-07-05,2015-07-06,2015-07-07,2015-07-08,2015-07-09,...,2017-09-01,2017-09-02,2017-09-03,2017-09-04,2017-09-05,2017-09-06,2017-09-07,2017-09-08,2017-09-09,2017-09-10
0,5566_zh.wikipedia.org_all-access_spider,12.0,7.0,4.0,5.0,20.0,8.0,5.0,17.0,24.0,...,13.0,13.0,45.0,4.0,13.0,20.0,18.0,17.0,14.0,11.0
1,Intel_80386_zh.wikipedia.org_all-access_spider,7.0,5.0,9.0,3.0,5.0,1.0,6.0,6.0,3.0,...,7.0,7.0,10.0,9.0,2.0,6.0,7.0,9.0,6.0,5.0
2,Kara_zh.wikipedia.org_all-access_spider,56.0,25.0,7.0,50.0,90.0,30.0,19.0,16.0,10.0,...,13.0,13.0,11.0,9.0,9.0,14.0,19.0,36.0,10.0,8.0
3,MAMAMOO_zh.wikipedia.org_all-access_spider,8.0,7.0,17.0,11.0,8.0,1.0,1.0,10.0,6.0,...,9.0,19.0,17.0,19.0,8.0,4.0,10.0,10.0,10.0,11.0
4,Netflix_zh.wikipedia.org_all-access_spider,7.0,10.0,21.0,2.0,9.0,13.0,7.0,6.0,7.0,...,23.0,37.0,38.0,21.0,35.0,29.0,35.0,25.0,24.0,22.0


In [15]:
final_object.final_fun_2([(5,'2015-09-07',data.at[5,'2015-09-07']),
                         (265,'2016-01-09',data.at[265,'2016-01-09']), #Providing a list of 4 inputs
                         (1125,'2016-04-23',data.at[1125,'2016-04-23']),
                         (20,'2015-10-07',data.at[20,'2015-10-07'])])

+------------+------------+----------+-----------+--------+-------+----------------+
|   Client   |   Access   | Language | Predicted | Actual | SMAPE |   Time Taken   |
+------------+------------+----------+-----------+--------+-------+----------------+
|   spider   | all_access | Chinese  |     7     |   10   | 0.353 | 0:00:00.548164 |
| non-spider |  desktop   |  French  |    115    |  109   | 0.054 | 0:00:00.095094 |
| non-spider |   mobile   | Russian  |    308    |  227   | 0.303 | 0:00:00.157086 |
|   spider   | all_access | Chinese  |     3     |   4    | 0.286 | 0:00:00.145276 |
+------------+------------+----------+-----------+--------+-------+----------------+
