In [None]:
#| default_exp matchdb

# MatchDB Core

> Suggesting people to connect with based on shared interests

**PyMatch is a database of users and their interests. Given a user from its database, it will search and return others with matching/mutual interests**

### What data will be stored?

*Note: This project is using a serverless NoSQL database called Base by deta.sh. It's free, has unlimited storage and is easy to use!*

- Dates are UTC in the format `dd-mm-yyyy hh:00`, in code this is %d-%m-%Y %H:00 (see [Python strftime cheatsheet](https://strftime.org/) for more formats).
- `user_id` is a unique identifier for each user and `group_id` is for the group/server they belong to.
- [ULIDs (Universally Unique Lexicographically Sortable Identifiers)](https://github.com/ahawker/ulid) are used as keys and order is is preserved since data in Bases are ordered by key; `key` is required by Deta and needs to be unique.
- Interests are a list of strings.
<br>


**Users**

Info stored for each user

```
[{'date': '02-10-2022 17:00',
  'key': '01BJQMF54D093DXEAWZ6JYRPAQ,
  'user_id': '1111',
  'group_id': '2222',
  'name': 'cameronson the 69th',
  'interests': ['MMA', 'memes', 'Uk', 'tea']}]
```

In [None]:
#|export

from datetime import datetime
import time
from deta import Deta
import pandas as pd
import os
import ulid
from typing import Optional, List, Dict

**A Deta project key is needed, create one on https://deta.space and save it as an environament variable.**

Note: it'll be convenient if you save the env variable as "PROJECT_KEY" since that's the default argument for the project_key parameter in all the functions that use it

## Users

### Add

In [None]:
#|export

def add_someone(username: str, 
                user_id: str, # unique identifier to authenticate users
                database_name: str, # create or connect to an existing database
                interests: List[str],
                group_id: Optional[str] = None, # id for the group/server the user is from
                project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                ):

    """
    Create a new user and add their interests. Will be used by `add_interests`.
    
    All interests will be made lowercase.
    """
    
    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)
      
    user = db.put(
        {
            'key': ulid.new().str,
            "date": datetime.now().strftime("%d-%m-%Y %H:00"),
            "user_id": user_id,
            "group_id": group_id,
            'interests': interests
            }
    )

### Find

In [None]:
#|export

def find_by_userid(user_id: str,
                   database_name: str,
                   project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                  ) -> dict:
    
    "Find a user from thier user_id"

    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)

    user = db.fetch(
        {
        "user_id": user_id
        }
    ).items

    if bool(user):
        return user[0]
    else:
        print("user doesn't exist")
        raise ValueError("user doesn't exist")

### Delete

In [None]:
#|export

def delete_user(user_id: str,
                database_name: str,
                project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
               ):
    
    "Deletes an entry using thier user_id if they exist"

    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)

    delete = db.fetch(
        {
        "user_id": user_id
        }
    ).items

    # if exists, delete
    if bool(delete):
        key = delete[0]["key"]
        db.delete(key)
        print(f"user {user_id} deleted from {database_name}")
    else:
        print(f"user {user_id} not in {database_name}")

## Interests

### Add

In [None]:
#|export

def add_interests(username: str, 
                  user_id: str, # unique identifier to authenticate users
                  interests: List[str], 
                  database_name: str, # create or connect to an existing database
                  group_id: Optional[str] = None, # id for the group/server the user is from
                  project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                 ):

    "Add new interests to a user if they exist or creates a new user using `add_someone` if they don't."
    
    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)
    
    # check if user exists
    # if there's group + user ID then check for the combination
    if group_id is None:
        check = db.fetch({"user_id": user_id}).items
    else:
        check = db.fetch({"group_id": user_id, "group_id": user_id}).items
    
    if bool(check):
        # get key
        key = check[0]["key"]
        
        # get a list of existing interests
        current = find_by_userid(user_id, database_name, project_key)["interests"]
        
        # concat the new interest(s) to the existing list
        # only add unique interests i.e. no duplicates
        new = list(set(current+interests))
        
        # update lowercase interests
        user = db.update({'interests': list(map(str.lower, new))}, key=key)

    else:
        # create new user if they don't exist
        add_someone(username = username, 
                    user_id = user_id,
                    group_id = group_id,
                    interests = interests, 
                    database_name = database_name
                   )

### Show a user's interests

In [None]:
#|export

def show_interests(user_id: str,
                   database_name: str,
                   project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                  ) -> List[str]:
    
    "Gets a list of interests for a given user. Uses `find_by_userid`."
    
    return find_by_userid(user_id, database_name, project_key)["interests"]

#### Delete interests

In [None]:
#|export

def delete_interests(user_id: str, # unique identifier to authenticate users
                     remove_interests: List[str], 
                     database_name: str, # create or connect to an existing database
                     project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                    ):

    "Delete interest(s)."
    
    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)
        
    # get a list of existing interests
    current = find_by_userid(user_id, database_name, project_key)["interests"]
        
    # remove item(s)
    new = list(set(current) - set(remove_interests))
    
    # get key
    from_user = db.fetch({"user_id": user_id}).items
    key = from_user[0]["key"]
    
    # update interests
    user = db.update({'interests': list(map(str.lower, new))}, key=key)

## Match

In [None]:
#|export

def match_interests(user_id: str,
                    database_name: str,
                    project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                   ) -> List[dict]:
    
    "Match users to a given user_id and return names and common/shared interests"

    deta = Deta(os.environ["PROJECT_KEY"])
    users = deta.Base(database_name)
    
    # get key
    from_user = users.fetch({"user_id": user_id}).items
    key = from_user[0]["key"]

    # get interests for a user
    interests = users.get(key)['interests']

    # match
    match = users.fetch([{'interests?contains' : item} for item in interests]).items

    # name = item['name']
    # common_interests = set(interests) & set(item['interests'])

    matches = []
    for item in match:
        matches.append(
            {
                'username': item['username'],
                'user_id': item['user_id'],
                'common interests': list(set(interests) & set(item['interests'])),
                'common interests count': len(set(interests) & set(item['interests']))
            }
        )

    return matches

## Stats

### Get whole database

In [None]:
#|export

def database_exists(database_name: str,
                    project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                   ) -> bool:
    
    "check if db exists by checking if there's at least one item"

    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)

    if db.fetch(limit=1).items:
        return True
    else:
        raise NameError(f"{database_name} doesn't exist")

In [None]:
#|export

def fetch_all(database_name: str,
              project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
             ) -> List[dict]:
    """
    fetches the whole database

    this is from deta's docs: https://docs.deta.sh/docs/base/sdk/#fetch-all-items-1

    uses `database_exists`
    """

    database_exists(database_name, project_key) # will create error if db doesn't exist

    deta = Deta(os.environ["PROJECT_KEY"])
    db = deta.Base(database_name)
    
    res = db.fetch()
    all_items = res.items

    # fetch until last is 'None'
    while res.last:
        res = db.fetch(last=res.last)
        all_items += res.items   

    return all_items

In [None]:
#|export

def database_to_dataframe(database_name: str,
                          project_key: str = "PROJECT_KEY" # the environment variable name where your Deta project key is stored
                         ) -> pd.DataFrame:
    """
    fetches the whole database and converts it to a pandas dataframe

    uses `fetch_all`
    """

    import pandas as pd

    all_items = fetch_all(database_name, project_key)

    return pd.DataFrame.from_dict(all_items)

### Count interests

In [None]:
#|export

def count_interests(database_name: str ='users') -> List[dict]:
    """
    Shows each interest and how many times they occur. If needed, this can work for any column that contains a list of strings.

    Uses `database_to_dataframe`
    """

    count = []
    for item in database_to_dataframe(database_name)['interests'].explode().value_counts():
        count.append(item.to_dict())

    return count

In [None]:
#|export

def interestcount_to_dataframe(database_name: str ='users') -> pd.DataFrame:
    """
    Get interest counts as a pandas dataframe

    Uses `database_to_dataframe`
    """

    # https://re-thought.com/pandas-value_counts/

    value_counts = database_to_dataframe(database_name)['interests'].explode().value_counts()

    # converting to df and assigning new names to the columns
    df_value_counts = pd.DataFrame(value_counts)
    df_value_counts = df_value_counts.reset_index()
    df_value_counts.columns = ['interests', 'count'] # change column names
    
    return df_value_counts

### User stats

In [None]:
#|export

def total_users(database_name: str) -> int:
    "Count total users. Uses `fetch_all`"

    return len(fetch_all(database_name))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()