In [None]:
import ipaddress
from ip2geotools.databases.noncommercial import DbIpCity
import geoip2.database

def ip_str_to_int(ip: str) -> int:
    """ Converts IP address from string to int
    Args:
        ip (str): IP address as a string  
    Returns:
        ip (int): IP address as an int
    """
    return int(ipaddress.ip_address(ip))


def ip_int_to_str(ip: int)  -> str:
    """ Converts IP address from string to int
    Args:
        ip (str): IP address as a string  
    Returns:
        ip (int): IP address as an int
    """
    return str(ipaddress.ip_network(ip)).partition("/")[0]


def ip_str_to_city(ip: str) -> str:
    """ Looks up city name for a given stringified IP address
    Args:
        ip (str): IP address as a string  
    Returns:
        city-name (str): City where the IP address is located
    """
    response = DbIpCity.get(ip, api_key='free')
    return response.city



# Create an account and download this file (for free) from here:
# https://dev.maxmind.com/geoip/docs/databases/city-and-country?lang=en
def ip_str_to_city2(ip: str) -> str:
    """ Looks up city name for a given stringified IP address
    Args:
        ip (str): IP address as a string  
    Returns:
        city-name (str): City where the IP address is located
    """
    # Create an account and download this file (for free) from here:
    # https://dev.maxmind.com/geoip/docs/databases/city-and-country?lang=en
    with geoip2.database.Reader('./GeoLite2-City.mmdb') as reader:
        try:
            response = reader.city(ip)
            return response.city.name
        except geoip2.errors.AddressNotFoundError:
            return "Dublin"


In [None]:
from datetime import datetime
 
def date_string_to_timestamp(input_date : str) -> int:
    """ Converts stringified date to a timestamp
    Args:
        input_date (str): The input  
    Returns:
        timestamp (int): The Unix timestamp
    """
    date_format = "%m-%d-%Y %H:%M %S"
    return int(float(datetime.strptime(input_date, date_format).timestamp()) * 1000)



In [None]:
import hsfs
import pandas as pd
from features import ip_features as ipf


def connect(featurestore : str) -> hsfs.feature_store.FeatureStore:
    print("Connecting....")
    connection = hsfs.connection(
        host="791bb4a0-bb1c-11ec-8721-7bd8cdac0b54.cloud.hopsworks.ai", # hostname for your Hopsworks cluster
        project=featurestore,
        engine="hive",
        secrets_store="local",
        api_key_file="./api-key.txt"
    )
    return connection.get_feature_store()


def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
    print("Creating features....")
    df['ip_str'] = df.ip.apply(ipf.ip_int_to_str)
    df['city'] = df.ip_str.apply(ipf.ip_str_to_city2)
    return df


def read_data(path: str) -> pd.DataFrame: 
    print("Reading raw data....")
    return pd.read_csv(path, dtype={'is_attributed': 'bool'}, parse_dates=['click_time'])


def run(project: str) : 
    # First, you have to create a project on Hopsworks called 'prod'
    fs = connect(project)
    df = read_data("sample-click-logs.csv")
    df = engineer_features(df)

    print("Writing features to feature store....")
    fg_name="clicks"
    version=1
    try:
        fg = fs.get_feature_group(fg_name,version=version)
        fg.insert(df)
    except:
        print("Creating feature group...")
        fg = fs.create_feature_group(fg_name,
                        version=version,
                        description="User clicks on our website",
                        primary_key=['id'],
                        online_enabled=True)
        fg.save(df)

run("prod")
