# Inference Latency

## 1 Introduction
This notebook is used to measure inference latencies of (i) social and image feature extraction, (ii) dimensionality reduction and (iii) popularity score prediction.

## 2 Workflow
The workflow of this activity is as follows.
- Latency of feature extraction
- Latency of dimensionality reduction
- Latency of popularity score prediction


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Importing libraries
import numpy as np
import pandas as pd
import time

## 3 Latency of Feature Extraction

### 3.1 Social Features

In [10]:
# Sampling 8 social features
df = pd.read_json('/content/drive/MyDrive/M.Sc. Dissertation/Data & Code/train_data.json')
df = df.sample(8,random_state=1)

In [11]:
# Writing a function to impute post time

def post_time(t):
    if t.hour >= 6 and t.hour < 12:
        return 'Morning'
    elif t.hour >= 12 and t.hour < 18:
        return 'Afternoon'
    elif t.hour >= 18 and t.hour <24:
        return 'Evening'
    else:
        return 'Night'

In [12]:
# Wriring a function to fetch city and country from latitude and longitude

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='smpp')

def get_cc(row):
    latitude = row['Latitude']
    longitude = row['Longitude']
    if np.isnan(latitude) or np.isnan(longitude):
        return ('Unknown','Unknown')
    else:
        try:
            location = geolocator.reverse(f"{latitude},{longitude}")
            address = location.raw['address']
            city = address.get('city', '')
            country = address.get('country', '')
            return city, country
        except:
            return ('Unknown','Unknown')

In [13]:
# Writing a function to perform feature extraction

def feature_extraction(df):
    df = df.drop(columns='photo_firstdatetaken')
    df = df.drop(columns=['Mediastatus','Pathalias','Pid','Mediatype','location_description','user_description','Ispublic'])
    df['photo_firstdate'] = df['photo_firstdate'].map(lambda x: np.NaN if (x=='' or x=='None') else int(x))
    df['photo_firstdate'] = pd.to_datetime(df['photo_firstdate'],unit='s')

    df['Postdate'] = pd.to_datetime(df['Postdate'],unit='s')

    df['Longitude'] = df['Longitude'].map(lambda x: np.NaN if x=='' else float(x))
    df['Latitude'] = df[['Latitude','Geoaccuracy']].apply(lambda row: np.NaN if row['Geoaccuracy']==0 else row['Latitude'],axis=1)
    df['Longitude'] = df[['Longitude','Geoaccuracy']].apply(lambda row: np.NaN if row['Geoaccuracy']==0 else row['Longitude'],axis=1)

    p = pd.to_datetime(np.NaN)
    q = pd.to_datetime(365,unit='D')
    df['photo_firstdate'] = df['photo_firstdate'].map(lambda x: p if x<q else x)
    df['TitleLen'] = df['Title'].map(lambda x: len(x))
    df['TagNum'] = df['Alltags'].map(lambda x: len(x.split()))
    df['DaysSinceFirstPhoto'] = (df['Postdate'] - df['photo_firstdate']).apply(lambda x: x.days)

    df['DaysSinceFirstPhoto'][df['DaysSinceFirstPhoto']<0]=1425.0
    df = df.drop(columns=['photo_firstdate','Title','Alltags'])
    df['DayOfWeek'] = df['Postdate'].apply(lambda x: x.day_name())
    df['Month'] = df['Postdate'].apply(lambda x: x.month_name())

    df['Time'] = df['Postdate'].apply(post_time)
    df = df.drop(columns='Postdate')

    df[['City','Country']] = df[['Latitude','Longitude']].apply(get_cc, axis=1,result_type='expand')
    df = df.drop(columns=['Latitude','Longitude']).reset_index(drop=True)
    df['DaysSinceFirstPhoto'] = df['DaysSinceFirstPhoto'].fillna(1425.0)

    df = df.drop(columns=['timezone_timezone_id','img_path'])

    df['DayOfWeek'] = df['DayOfWeek'].map({
        'Monday': 0,
        'Tuesday': 1,
        'Wednesday': 2,
        'Thursday': 3,
        'Friday': 4,
        'Saturday': 5,
        'Sunday': 6
    })

    df['Month'] = df['Month'].map({
        'January': 0,
        'February': 1,
        'March': 2,
        'April': 3,
        'May': 4,
        'June': 5,
        'July': 6,
        'August': 7,
        'September': 8,
        'October': 9,
        'November': 10,
        'December': 11
    })

    df['Time'] = df['Time'].map({
        'Morning': 0,
        'Afternoon': 1,
        'Evening': 2,
        'Night': 3
    })

    return df

In [14]:
# Measuring the latency of social feature extraction
t = time.time()
df = feature_extraction(df)
time.time() - t

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['DaysSinceFirstPhoto'][df['DaysSinceFirstPhoto']<0]=1425.0


1.4558217525482178